Commit 98131ca1 authored by Maggie Chen's avatar Maggie Chen Committed by Commit Bot

Remove the GPU watchdog V1 code

GPU watchdog V2 has been enabled for months. It's time to clean up V1.

Merge class GpuWatchdogThreadImplV2 and class GpuWatchdogThread, and use
GpuWatchdogThread as default.

Bug: 949839
Change-Id: Ic0d784bf4f3bcf79e60b71389c42a824429f83e5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2455366Reviewed-by: default avatarZhenyao Mo <zmo@chromium.org>
Commit-Queue: Maggie Chen <magchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#814743}
parent 0d3b1f9c
......@@ -69,14 +69,6 @@ const base::Feature kGpuUseDisplayThreadPriority{
#endif
};
// Gpu watchdog V2 to simplify the logic and reduce GPU hangs
const base::Feature kGpuWatchdogV2{"GpuWatchdogV2",
base::FEATURE_ENABLED_BY_DEFAULT};
// Use a different set of watchdog timeouts on V1
const base::Feature kGpuWatchdogV1NewTimeout{"GpuWatchdogV1NewTimeout",
base::FEATURE_ENABLED_BY_DEFAULT};
// Use a different set of watchdog timeouts on V2
const base::Feature kGpuWatchdogV2NewTimeout{"GpuWatchdogV2NewTimeout",
base::FEATURE_DISABLED_BY_DEFAULT};
......
......@@ -34,10 +34,6 @@ GPU_EXPORT extern const base::Feature kDirectCompositionUseOverlayDamageList;
GPU_EXPORT extern const base::Feature kGpuUseDisplayThreadPriority;
GPU_EXPORT extern const base::Feature kGpuWatchdogV2;
GPU_EXPORT extern const base::Feature kGpuWatchdogV1NewTimeout;
GPU_EXPORT extern const base::Feature kGpuWatchdogV2NewTimeout;
#if defined(OS_MAC)
......
......@@ -37,8 +37,6 @@ component("service") {
"gpu_memory_buffer_factory.h",
"gpu_watchdog_thread.cc",
"gpu_watchdog_thread.h",
"gpu_watchdog_thread_v2.cc",
"gpu_watchdog_thread_v2.h",
"image_decode_accelerator_stub.cc",
"image_decode_accelerator_stub.h",
"image_decode_accelerator_worker.h",
......
......@@ -27,7 +27,6 @@
#include "gpu/config/gpu_switching.h"
#include "gpu/config/gpu_util.h"
#include "gpu/ipc/service/gpu_watchdog_thread.h"
#include "gpu/ipc/service/gpu_watchdog_thread_v2.h"
#include "ui/base/ui_base_features.h"
#include "ui/gfx/switches.h"
#include "ui/gl/buildflags.h"
......@@ -275,14 +274,9 @@ bool GpuInit::InitializeAndStartSandbox(base::CommandLine* command_line,
// Start the GPU watchdog only after anything that is expected to be time
// consuming has completed, otherwise the process is liable to be aborted.
if (enable_watchdog && !delayed_watchdog_enable) {
if (base::FeatureList::IsEnabled(features::kGpuWatchdogV2)) {
watchdog_thread_ = GpuWatchdogThreadImplV2::Create(
gpu_preferences_.watchdog_starts_backgrounded);
watchdog_init.SetGpuWatchdogPtr(watchdog_thread_.get());
} else {
watchdog_thread_ = GpuWatchdogThreadImplV1::Create(
gpu_preferences_.watchdog_starts_backgrounded);
}
watchdog_thread_ = GpuWatchdogThread::Create(
gpu_preferences_.watchdog_starts_backgrounded);
watchdog_init.SetGpuWatchdogPtr(watchdog_thread_.get());
#if defined(OS_WIN)
// This is a workaround for an occasional deadlock between watchdog and
......@@ -569,14 +563,9 @@ bool GpuInit::InitializeAndStartSandbox(base::CommandLine* command_line,
watchdog_thread_ = nullptr;
watchdog_init.SetGpuWatchdogPtr(nullptr);
} else if (enable_watchdog && delayed_watchdog_enable) {
if (base::FeatureList::IsEnabled(features::kGpuWatchdogV2)) {
watchdog_thread_ = GpuWatchdogThreadImplV2::Create(
gpu_preferences_.watchdog_starts_backgrounded);
watchdog_init.SetGpuWatchdogPtr(watchdog_thread_.get());
} else {
watchdog_thread_ = GpuWatchdogThreadImplV1::Create(
gpu_preferences_.watchdog_starts_backgrounded);
}
watchdog_thread_ = GpuWatchdogThread::Create(
gpu_preferences_.watchdog_starts_backgrounded);
watchdog_init.SetGpuWatchdogPtr(watchdog_thread_.get());
}
UMA_HISTOGRAM_ENUMERATION("GPU.GLImplementation", gl::GetGLImplementation());
......
......@@ -4,111 +4,124 @@
#include "gpu/ipc/service/gpu_watchdog_thread.h"
#include "base/atomicops.h"
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/bit_cast.h"
#include "base/debug/alias.h"
#include "base/debug/dump_without_crashing.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/format_macros.h"
#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_functions.h"
#include "base/native_library.h"
#include "base/numerics/safe_conversions.h"
#include "base/power_monitor/power_monitor.h"
#include "base/process/process.h"
#include "base/single_thread_task_runner.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringprintf.h"
#include "base/system/sys_info.h"
#include "base/task/current_thread.h"
#include "base/threading/platform_thread.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "gpu/config/gpu_crash_keys.h"
#include "gpu/config/gpu_finch_features.h"
#include "gpu/ipc/common/result_codes.h"
#include "ui/gl/shader_tracking.h"
#if defined(OS_WIN)
#include <windows.h>
#include "base/win/windows_version.h"
#endif
namespace gpu {
namespace {
#if defined(CYGPROFILE_INSTRUMENTATION)
const int kGpuTimeout = 30000;
#elif defined(OS_WIN) || defined(OS_MAC)
// Use a slightly longer timeout on Windows due to prevalence of slow and
// infected machines.
#if defined(OS_WIN)
base::TimeDelta GetGpuWatchdogTimeoutBasedOnCpuCores() {
if (base::win::GetVersion() >= base::win::Version::WIN10) {
int num_of_processors = base::SysInfo::NumberOfProcessors();
if (num_of_processors > 8)
return (kGpuWatchdogTimeout - base::TimeDelta::FromSeconds(10));
else if (num_of_processors <= 4)
return kGpuWatchdogTimeout + base::TimeDelta::FromSeconds(5);
}
// Also use a slightly longer timeout on MacOSX to get rid of GPU process
// hangs at context creation during startup. See https://crbug.com/918490.
const int kGpuTimeout = 15000;
#else
const int kGpuTimeout = 10000;
return kGpuWatchdogTimeout;
}
#endif
// The same set of timeouts from Watchdog V2 so we can compare the results
// between V1 and V2.
#if defined(CYGPROFILE_INSTRUMENTATION)
const int kNewGpuTimeout = 30000;
#elif defined(OS_MAC)
const int kNewGpuTimeout = 17000;
#else
const int kNewGpuTimeout = 15000;
GpuWatchdogThread::GpuWatchdogThread(base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool is_test_mode)
: base::Thread("GpuWatchdog"),
watchdog_timeout_(timeout),
watchdog_init_factor_(init_factor),
watchdog_restart_factor_(restart_factor),
in_gpu_initialization_(true),
max_extra_cycles_before_kill_(max_extra_cycles_before_kill),
is_test_mode_(is_test_mode),
watched_gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()) {
base::CurrentThread::Get()->AddTaskObserver(this);
num_of_processors_ = base::SysInfo::NumberOfProcessors();
#if defined(OS_WIN)
// GetCurrentThread returns a pseudo-handle that cannot be used by one thread
// to identify another. DuplicateHandle creates a "real" handle that can be
// used for this purpose.
if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
GetCurrentProcess(), &watched_thread_handle_,
THREAD_QUERY_INFORMATION, FALSE, 0)) {
watched_thread_handle_ = nullptr;
}
#endif
#if defined(USE_X11)
const base::FilePath::CharType kTtyFilePath[] =
FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
tty_file_ = base::OpenFile(
base::FilePath(FILE_PATH_LITERAL("/sys/class/tty/tty0/active")), "r");
UpdateActiveTTY();
host_tty_ = active_tty_;
#endif
} // namespace
Arm();
}
GpuWatchdogThreadImplV1::GpuWatchdogThreadImplV1()
: watched_task_runner_(base::ThreadTaskRunnerHandle::Get()),
armed_(false),
task_observer_(this),
use_thread_cpu_time_(true),
responsive_acknowledge_count_(0),
#if defined(OS_WIN)
watched_thread_handle_(0),
arm_cpu_time_(),
#endif
suspension_counter_(this)
#if defined(USE_X11)
,
host_tty_(-1)
#endif
{
if (base::FeatureList::IsEnabled(features::kGpuWatchdogV1NewTimeout))
timeout_ = base::TimeDelta::FromMilliseconds(kNewGpuTimeout);
else
timeout_ = base::TimeDelta::FromMilliseconds(kGpuTimeout);
GpuWatchdogThread::~GpuWatchdogThread() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Stop() might take too long and the watchdog timeout is triggered.
// Disarm first before calling Stop() to avoid a crash.
if (IsArmed())
Disarm();
PauseWatchdog();
base::subtle::NoBarrier_Store(&awaiting_acknowledge_, false);
Stop(); // stop the watchdog thread
base::CurrentThread::Get()->RemoveTaskObserver(this);
base::PowerMonitor::RemoveObserver(this);
GpuWatchdogHistogram(GpuWatchdogThreadEvent::kGpuWatchdogEnd);
#if defined(OS_WIN)
// GetCurrentThread returns a pseudo-handle that cannot be used by one thread
// to identify another. DuplicateHandle creates a "real" handle that can be
// used for this purpose.
BOOL result = DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
GetCurrentProcess(), &watched_thread_handle_,
THREAD_QUERY_INFORMATION, FALSE, 0);
DCHECK(result);
if (watched_thread_handle_)
CloseHandle(watched_thread_handle_);
#endif
#if defined(USE_X11)
tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r");
UpdateActiveTTY();
host_tty_ = active_tty_;
if (tty_file_)
fclose(tty_file_);
#endif
base::CurrentThread::Get()->AddTaskObserver(&task_observer_);
}
// static
std::unique_ptr<GpuWatchdogThreadImplV1> GpuWatchdogThreadImplV1::Create(
bool start_backgrounded) {
auto watchdog_thread = base::WrapUnique(new GpuWatchdogThreadImplV1);
std::unique_ptr<GpuWatchdogThread> GpuWatchdogThread::Create(
bool start_backgrounded,
base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool is_test_mode) {
auto watchdog_thread = base::WrapUnique(
new GpuWatchdogThread(timeout, init_factor, restart_factor,
max_extra_cycles_before_kill, is_test_mode));
base::Thread::Options options;
options.timer_slack = base::TIMER_SLACK_MAXIMUM;
watchdog_thread->StartWithOptions(options);
......@@ -117,346 +130,567 @@ std::unique_ptr<GpuWatchdogThreadImplV1> GpuWatchdogThreadImplV1::Create(
return watchdog_thread;
}
void GpuWatchdogThreadImplV1::CheckArmed() {
base::subtle::NoBarrier_Store(&awaiting_acknowledge_, false);
// static
std::unique_ptr<GpuWatchdogThread> GpuWatchdogThread::Create(
bool start_backgrounded) {
base::TimeDelta gpu_watchdog_timeout = kGpuWatchdogTimeout;
int init_factor = kInitFactor;
int restart_factor = kRestartFactor;
int max_extra_cycles_before_kill = kMaxExtraCyclesBeforeKill;
if (base::FeatureList::IsEnabled(features::kGpuWatchdogV2NewTimeout)) {
const char kNewTimeOutParam[] = "new_time_out";
const char kMaxExtraCyclesBeforeKillParam[] =
"max_extra_cycles_before_kill";
#if defined(OS_WIN)
// The purpose of finch on Windows is to know the impact of the number of
// CPU cores while the rest of platforms are to try a different watchdog
// timeout length.
gpu_watchdog_timeout = GetGpuWatchdogTimeoutBasedOnCpuCores();
constexpr int kFinchMaxExtraCyclesBeforeKill = 0;
#elif defined(OS_ANDROID)
constexpr int kFinchMaxExtraCyclesBeforeKill = 0;
init_factor = kInitFactorFinch;
restart_factor = kRestartFactorFinch;
#elif defined(OS_MAC)
constexpr int kFinchMaxExtraCyclesBeforeKill = 1;
#else
constexpr int kFinchMaxExtraCyclesBeforeKill = 2;
#endif
int timeout = base::GetFieldTrialParamByFeatureAsInt(
features::kGpuWatchdogV2NewTimeout, kNewTimeOutParam,
gpu_watchdog_timeout.InSeconds());
gpu_watchdog_timeout = base::TimeDelta::FromSeconds(timeout);
max_extra_cycles_before_kill = base::GetFieldTrialParamByFeatureAsInt(
features::kGpuWatchdogV2NewTimeout, kMaxExtraCyclesBeforeKillParam,
kFinchMaxExtraCyclesBeforeKill);
}
return Create(start_backgrounded, gpu_watchdog_timeout, init_factor,
restart_factor, max_extra_cycles_before_kill, false);
}
void GpuWatchdogThreadImplV1::ReportProgress() {
CheckArmed();
// Do not add power observer during watchdog init, PowerMonitor might not be up
// running yet.
void GpuWatchdogThread::AddPowerObserver() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Forward it to the watchdog thread. Call PowerMonitor::AddObserver on the
// watchdog thread so that OnSuspend and OnResume will be called on watchdog
// thread.
is_add_power_observer_called_ = true;
task_runner()->PostTask(FROM_HERE,
base::BindOnce(&GpuWatchdogThread::OnAddPowerObserver,
base::Unretained(this)));
}
void GpuWatchdogThreadImplV1::OnBackgrounded() {
// As we stop the task runner before destroying this class, the unretained
// reference will always outlive the task.
// Android Chrome goes to the background. Called from the gpu thread.
void GpuWatchdogThread::OnBackgrounded() {
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV1::OnBackgroundedOnWatchdogThread,
base::Unretained(this)));
base::BindOnce(&GpuWatchdogThread::StopWatchdogTimeoutTask,
base::Unretained(this), kAndroidBackgroundForeground));
}
void GpuWatchdogThreadImplV1::OnForegrounded() {
// As we stop the task runner before destroying this class, the unretained
// reference will always outlive the task.
// Android Chrome goes to the foreground. Called from the gpu thread.
void GpuWatchdogThread::OnForegrounded() {
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV1::OnForegroundedOnWatchdogThread,
base::Unretained(this)));
base::BindOnce(&GpuWatchdogThread::RestartWatchdogTimeoutTask,
base::Unretained(this), kAndroidBackgroundForeground));
}
bool GpuWatchdogThreadImplV1::IsGpuHangDetectedForTesting() {
return false;
}
// Called from the gpu thread when gpu init has completed.
void GpuWatchdogThread::OnInitComplete() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
void GpuWatchdogThreadImplV1::Init() {
// Schedule the first check.
OnCheck(false);
task_runner()->PostTask(
FROM_HERE, base::BindOnce(&GpuWatchdogThread::UpdateInitializationFlag,
base::Unretained(this)));
Disarm();
}
void GpuWatchdogThreadImplV1::CleanUp() {
weak_factory_.InvalidateWeakPtrs();
armed_ = false;
}
// Called from the gpu thread in viz::GpuServiceImpl::~GpuServiceImpl().
// After this, no Disarm() will be called before the watchdog thread is
// destroyed. If this destruction takes too long, the watchdog timeout
// will be triggered.
void GpuWatchdogThread::OnGpuProcessTearDown() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
GpuWatchdogThreadImplV1::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
GpuWatchdogThreadImplV1* watchdog)
: watchdog_(watchdog) {}
in_gpu_process_teardown_ = true;
if (!IsArmed())
Arm();
}
GpuWatchdogThreadImplV1::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() =
default;
// Called from the gpu main thread.
void GpuWatchdogThread::PauseWatchdog() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
void GpuWatchdogThreadImplV1::GpuWatchdogTaskObserver::WillProcessTask(
const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) {
watchdog_->CheckArmed();
task_runner()->PostTask(
FROM_HERE, base::BindOnce(&GpuWatchdogThread::StopWatchdogTimeoutTask,
base::Unretained(this), kGeneralGpuFlow));
}
void GpuWatchdogThreadImplV1::GpuWatchdogTaskObserver::DidProcessTask(
const base::PendingTask& pending_task) {}
// Called from the gpu main thread.
void GpuWatchdogThread::ResumeWatchdog() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
GpuWatchdogThreadImplV1::SuspensionCounter::SuspensionCounterRef::
SuspensionCounterRef(SuspensionCounter* counter)
: counter_(counter) {
counter_->OnAddRef();
task_runner()->PostTask(
FROM_HERE, base::BindOnce(&GpuWatchdogThread::RestartWatchdogTimeoutTask,
base::Unretained(this), kGeneralGpuFlow));
}
GpuWatchdogThreadImplV1::SuspensionCounter::SuspensionCounterRef::
~SuspensionCounterRef() {
counter_->OnReleaseRef();
// Running on the watchdog thread.
// On Linux, Init() will be called twice for Sandbox Initialization. The
// watchdog is stopped and then restarted in StartSandboxLinux(). Everything
// should be the same and continue after the second init().
void GpuWatchdogThread::Init() {
watchdog_thread_task_runner_ = base::ThreadTaskRunnerHandle::Get();
// Get and Invalidate weak_ptr should be done on the watchdog thread only.
weak_ptr_ = weak_factory_.GetWeakPtr();
base::TimeDelta timeout = watchdog_timeout_ * kInitFactor;
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThread::OnWatchdogTimeout, weak_ptr_),
timeout);
last_arm_disarm_counter_ = ReadArmDisarmCounter();
watchdog_start_timeticks_ = base::TimeTicks::Now();
last_on_watchdog_timeout_timeticks_ = watchdog_start_timeticks_;
next_on_watchdog_timeout_time_ = base::Time::Now() + timeout;
#if defined(OS_WIN)
if (watched_thread_handle_) {
if (base::ThreadTicks::IsSupported())
base::ThreadTicks::WaitUntilInitialized();
last_on_watchdog_timeout_thread_ticks_ = GetWatchedThreadTime();
remaining_watched_thread_ticks_ = timeout;
}
#endif
}
GpuWatchdogThreadImplV1::SuspensionCounter::SuspensionCounter(
GpuWatchdogThreadImplV1* watchdog_thread)
: watchdog_thread_(watchdog_thread) {
// This class will only be used on the watchdog thread, but is constructed on
// the main thread. Detach.
DETACH_FROM_SEQUENCE(watchdog_thread_sequence_checker_);
// Running on the watchdog thread.
void GpuWatchdogThread::CleanUp() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
weak_factory_.InvalidateWeakPtrs();
}
std::unique_ptr<
GpuWatchdogThreadImplV1::SuspensionCounter::SuspensionCounterRef>
GpuWatchdogThreadImplV1::SuspensionCounter::Take() {
DCHECK_CALLED_ON_VALID_SEQUENCE(watchdog_thread_sequence_checker_);
return std::make_unique<SuspensionCounterRef>(this);
void GpuWatchdogThread::ReportProgress() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
InProgress();
}
bool GpuWatchdogThreadImplV1::SuspensionCounter::HasRefs() const {
DCHECK_CALLED_ON_VALID_SEQUENCE(watchdog_thread_sequence_checker_);
return suspend_count_ > 0;
void GpuWatchdogThread::WillProcessTask(const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// The watchdog is armed at the beginning of the gpu process teardown.
// Do not call Arm() during teardown.
if (in_gpu_process_teardown_)
DCHECK(IsArmed());
else
Arm();
}
void GpuWatchdogThreadImplV1::SuspensionCounter::OnWatchdogThreadStopped() {
DETACH_FROM_SEQUENCE(watchdog_thread_sequence_checker_);
void GpuWatchdogThread::DidProcessTask(const base::PendingTask& pending_task) {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Null the |watchdog_thread_| ptr at shutdown to avoid trying to suspend or
// resume after the thread is stopped.
watchdog_thread_ = nullptr;
// Keep the watchdog armed during tear down.
if (in_gpu_process_teardown_)
InProgress();
else
Disarm();
}
void GpuWatchdogThreadImplV1::SuspensionCounter::OnAddRef() {
DCHECK_CALLED_ON_VALID_SEQUENCE(watchdog_thread_sequence_checker_);
suspend_count_++;
if (watchdog_thread_ && suspend_count_ == 1)
watchdog_thread_->SuspendStateChanged();
// Power Suspends. Running on the watchdog thread.
void GpuWatchdogThread::OnSuspend() {
StopWatchdogTimeoutTask(kPowerSuspendResume);
}
void GpuWatchdogThreadImplV1::SuspensionCounter::OnReleaseRef() {
DCHECK_CALLED_ON_VALID_SEQUENCE(watchdog_thread_sequence_checker_);
DCHECK_GT(suspend_count_, 0u);
suspend_count_--;
if (watchdog_thread_ && suspend_count_ == 0)
watchdog_thread_->SuspendStateChanged();
// Power Resumes. Running on the watchdog thread.
void GpuWatchdogThread::OnResume() {
RestartWatchdogTimeoutTask(kPowerSuspendResume);
}
GpuWatchdogThreadImplV1::~GpuWatchdogThreadImplV1() {
DCHECK(watched_task_runner_->BelongsToCurrentThread());
// Running on the watchdog thread.
void GpuWatchdogThread::OnAddPowerObserver() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
DCHECK(base::PowerMonitor::IsInitialized());
Stop();
suspension_counter_.OnWatchdogThreadStopped();
base::PowerMonitor::AddObserver(this);
is_power_observer_added_ = true;
}
// Running on the watchdog thread.
void GpuWatchdogThread::RestartWatchdogTimeoutTask(
PauseResumeSource source_of_request) {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
base::TimeDelta timeout;
switch (source_of_request) {
case kAndroidBackgroundForeground:
if (!is_backgrounded_)
return;
is_backgrounded_ = false;
timeout = watchdog_timeout_ * watchdog_restart_factor_;
foregrounded_timeticks_ = base::TimeTicks::Now();
foregrounded_event_ = true;
num_of_timeout_after_foregrounded_ = 0;
break;
case kPowerSuspendResume:
if (!in_power_suspension_)
return;
in_power_suspension_ = false;
timeout = watchdog_timeout_ * watchdog_restart_factor_;
power_resume_timeticks_ = base::TimeTicks::Now();
power_resumed_event_ = true;
num_of_timeout_after_power_resume_ = 0;
break;
case kGeneralGpuFlow:
if (!is_paused_)
return;
is_paused_ = false;
timeout = watchdog_timeout_ * watchdog_init_factor_;
watchdog_resume_timeticks_ = base::TimeTicks::Now();
break;
}
if (!is_backgrounded_ && !in_power_suspension_ && !is_paused_) {
weak_ptr_ = weak_factory_.GetWeakPtr();
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThread::OnWatchdogTimeout, weak_ptr_),
timeout);
last_on_watchdog_timeout_timeticks_ = base::TimeTicks::Now();
next_on_watchdog_timeout_time_ = base::Time::Now() + timeout;
last_arm_disarm_counter_ = ReadArmDisarmCounter();
#if defined(OS_WIN)
CloseHandle(watched_thread_handle_);
if (watched_thread_handle_) {
last_on_watchdog_timeout_thread_ticks_ = GetWatchedThreadTime();
remaining_watched_thread_ticks_ = timeout;
}
#endif
}
}
base::PowerMonitor::RemoveObserver(this);
void GpuWatchdogThread::StopWatchdogTimeoutTask(
PauseResumeSource source_of_request) {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
switch (source_of_request) {
case kAndroidBackgroundForeground:
if (is_backgrounded_)
return;
is_backgrounded_ = true;
backgrounded_timeticks_ = base::TimeTicks::Now();
foregrounded_event_ = false;
break;
case kPowerSuspendResume:
if (in_power_suspension_)
return;
in_power_suspension_ = true;
power_suspend_timeticks_ = base::TimeTicks::Now();
power_resumed_event_ = false;
break;
case kGeneralGpuFlow:
if (is_paused_)
return;
is_paused_ = true;
watchdog_pause_timeticks_ = base::TimeTicks::Now();
break;
}
#if defined(USE_X11)
if (tty_file_)
fclose(tty_file_);
#endif
// Revoke any pending watchdog timeout task
weak_factory_.InvalidateWeakPtrs();
}
base::CurrentThread::Get()->RemoveTaskObserver(&task_observer_);
void GpuWatchdogThread::UpdateInitializationFlag() {
in_gpu_initialization_ = false;
}
void GpuWatchdogThreadImplV1::OnAcknowledge() {
CHECK(base::PlatformThread::CurrentId() == GetThreadId());
// Called from the gpu main thread.
// The watchdog is armed only in these three functions -
// GpuWatchdogThread(), WillProcessTask(), and OnGpuProcessTearDown()
void GpuWatchdogThread::Arm() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// The check has already been acknowledged and another has already been
// scheduled by a previous call to OnAcknowledge. It is normal for a
// watched thread to see armed_ being true multiple times before
// the OnAcknowledge task is run on the watchdog thread.
if (!armed_)
return;
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 1);
// Revoke any pending hang termination.
weak_factory_.InvalidateWeakPtrs();
armed_ = false;
// Arm/Disarm are always called in sequence. Now it's an odd number.
DCHECK(IsArmed());
}
if (suspension_counter_.HasRefs()) {
responsive_acknowledge_count_ = 0;
return;
}
void GpuWatchdogThread::Disarm() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
base::Time current_time = base::Time::Now();
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 1);
// The watchdog waits until at least 6 consecutive checks have returned in
// less than 50 ms before it will start ignoring the CPU time in determining
// whether to timeout. This is a compromise to allow startups that are slow
// due to disk contention to avoid timing out, but once the GPU process is
// running smoothly the watchdog will be able to detect hangs that don't use
// the CPU.
if ((current_time - check_time_) < base::TimeDelta::FromMilliseconds(50))
responsive_acknowledge_count_++;
else
responsive_acknowledge_count_ = 0;
// Arm/Disarm are always called in sequence. Now it's an even number.
DCHECK(!IsArmed());
}
if (responsive_acknowledge_count_ >= 6)
use_thread_cpu_time_ = false;
void GpuWatchdogThread::InProgress() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// If it took a long time for the acknowledgement, assume the computer was
// recently suspended.
bool was_suspended = (current_time > suspension_timeout_);
// Increment by 2. This is equivalent to Disarm() + Arm().
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 2);
// The monitored thread has responded. Post a task to check it again.
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV1::OnCheck,
weak_factory_.GetWeakPtr(), was_suspended),
0.5 * timeout_);
// Now it's an odd number.
DCHECK(IsArmed());
}
void GpuWatchdogThreadImplV1::OnCheck(bool after_suspend) {
CHECK(base::PlatformThread::CurrentId() == GetThreadId());
bool GpuWatchdogThread::IsArmed() {
// It's an odd number.
return base::subtle::NoBarrier_Load(&arm_disarm_counter_) & 1;
}
// Do not create any new termination tasks if one has already been created
// or the system is suspended.
if (armed_ || suspension_counter_.HasRefs())
return;
base::subtle::Atomic32 GpuWatchdogThread::ReadArmDisarmCounter() {
return base::subtle::NoBarrier_Load(&arm_disarm_counter_);
}
armed_ = true;
// Running on the watchdog thread.
void GpuWatchdogThread::OnWatchdogTimeout() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
DCHECK(!is_backgrounded_);
DCHECK(!in_power_suspension_);
DCHECK(!is_paused_);
// Must set |awaiting_acknowledge_| before posting the task. This task might
// be the only task that will activate the TaskObserver on the watched thread
// and it must not miss the false -> true transition. No barrier is needed
// here, as the PostTask which follows contains a barrier.
base::subtle::NoBarrier_Store(&awaiting_acknowledge_, true);
// If this metric is added too early (eg. watchdog creation time), it cannot
// be persistent. The histogram data will be lost after crash or browser exit.
// Delay the recording of kGpuWatchdogStart until the firs
// OnWatchdogTimeout() to ensure this metric is created in the persistent
// memory.
if (!is_watchdog_start_histogram_recorded) {
is_watchdog_start_histogram_recorded = true;
GpuWatchdogHistogram(GpuWatchdogThreadEvent::kGpuWatchdogStart);
}
#if defined(OS_WIN)
arm_cpu_time_ = GetWatchedThreadTime();
auto arm_disarm_counter = ReadArmDisarmCounter();
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kTimeout);
if (power_resumed_event_)
num_of_timeout_after_power_resume_++;
if (foregrounded_event_)
num_of_timeout_after_foregrounded_++;
QueryUnbiasedInterruptTime(&arm_interrupt_time_);
#if defined(USE_X11)
UpdateActiveTTY();
#endif
check_time_ = base::Time::Now();
check_timeticks_ = base::TimeTicks::Now();
// Immediately after the computer is woken up from being suspended it might
// be pretty sluggish, so allow some extra time before the next timeout.
base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
suspension_timeout_ = check_time_ + timeout * 2;
// Collect all needed info for gpu hang detection.
bool disarmed = arm_disarm_counter % 2 == 0; // even number
bool gpu_makes_progress = arm_disarm_counter != last_arm_disarm_counter_;
bool no_gpu_hang = disarmed || gpu_makes_progress || SlowWatchdogThread();
// Post a task to the monitored thread that does nothing but wake up the
// TaskObserver. Any other tasks that are pending on the watched thread will
// also wake up the observer. This simply ensures there is at least one.
watched_task_runner_->PostTask(FROM_HERE, base::DoNothing());
bool watched_thread_needs_more_time =
WatchedThreadNeedsMoreThreadTime(no_gpu_hang);
no_gpu_hang = no_gpu_hang || watched_thread_needs_more_time ||
ContinueOnNonHostX11ServerTty();
// Post a task to the watchdog thread to exit if the monitored thread does
// not respond in time.
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV1::OnCheckTimeout,
weak_factory_.GetWeakPtr()),
timeout);
}
bool allows_extra_timeout = WatchedThreadGetsExtraTimeout(no_gpu_hang);
no_gpu_hang = no_gpu_hang || allows_extra_timeout;
// No gpu hang. Continue with another OnWatchdogTimeout task.
if (no_gpu_hang) {
last_on_watchdog_timeout_timeticks_ = base::TimeTicks::Now();
next_on_watchdog_timeout_time_ = base::Time::Now() + watchdog_timeout_;
last_arm_disarm_counter_ = ReadArmDisarmCounter();
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThread::OnWatchdogTimeout, weak_ptr_),
watchdog_timeout_);
return;
}
// Still armed without any progress. GPU possibly hangs.
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kKill);
#if defined(OS_WIN)
if (less_than_full_thread_time_after_capped_)
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kKillOnLessThreadTime);
#endif
void GpuWatchdogThreadImplV1::OnCheckTimeout() {
DeliberatelyTerminateToRecoverFromHang();
}
// Use the --disable-gpu-watchdog command line switch to disable this.
void GpuWatchdogThreadImplV1::DeliberatelyTerminateToRecoverFromHang() {
// Should not get here while the system is suspended.
DCHECK(!suspension_counter_.HasRefs());
bool GpuWatchdogThread::SlowWatchdogThread() {
// If it takes 15 more seconds than the expected time between two
// OnWatchdogTimeout() calls, the system is considered slow and it's not a GPU
// hang.
bool slow_watchdog_thread =
(base::Time::Now() - next_on_watchdog_timeout_time_) >=
base::TimeDelta::FromSeconds(15);
// If the watchdog woke up significantly behind schedule, disarm and reset
// the watchdog check. This is to prevent the watchdog thread from terminating
// when a machine wakes up from sleep or hibernation, which would otherwise
// appear to be a hang.
if (base::Time::Now() > suspension_timeout_) {
OnAcknowledge();
return;
// Record this case only when a GPU hang is detected and the thread is slow.
if (slow_watchdog_thread)
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kSlowWatchdogThread);
return slow_watchdog_thread;
}
bool GpuWatchdogThread::WatchedThreadNeedsMoreThreadTime(
bool no_gpu_hang_detected) {
#if defined(OS_WIN)
if (!watched_thread_handle_)
return false;
// We allow extra thread time. When that runs out, we extend extra timeout
// cycles. Now, we are extending extra timeout cycles. Don't add extra thread
// time.
if (count_of_extra_cycles_ > 0)
return false;
WatchedThreadNeedsMoreThreadTimeHistogram(
no_gpu_hang_detected,
/*start_of_more_thread_time*/ false);
if (!no_gpu_hang_detected && count_of_more_gpu_thread_time_allowed_ >=
kMaxCountOfMoreGpuThreadTimeAllowed) {
less_than_full_thread_time_after_capped_ = true;
} else {
less_than_full_thread_time_after_capped_ = false;
}
if (!base::subtle::NoBarrier_Load(&awaiting_acknowledge_)) {
OnAcknowledge();
return;
// Calculate how many thread ticks the watched thread spent doing the work.
base::ThreadTicks now = GetWatchedThreadTime();
base::TimeDelta thread_time_elapsed =
now - last_on_watchdog_timeout_thread_ticks_;
last_on_watchdog_timeout_thread_ticks_ = now;
remaining_watched_thread_ticks_ -= thread_time_elapsed;
if (no_gpu_hang_detected ||
count_of_more_gpu_thread_time_allowed_ >=
kMaxCountOfMoreGpuThreadTimeAllowed ||
thread_time_elapsed < base::TimeDelta() /* bogus data */ ||
remaining_watched_thread_ticks_ <= base::TimeDelta()) {
// Reset the remaining thread ticks.
remaining_watched_thread_ticks_ = watchdog_timeout_;
count_of_more_gpu_thread_time_allowed_ = 0;
return false;
} else {
// This is the start of allowing more thread time.
if (count_of_more_gpu_thread_time_allowed_ == 0) {
WatchedThreadNeedsMoreThreadTimeHistogram(
no_gpu_hang_detected, /*start_of_more_thread_time*/ true);
}
count_of_more_gpu_thread_time_allowed_++;
return true;
}
#else
return false;
#endif
}
#if defined(OS_WIN)
// Defer termination until a certain amount of CPU time has elapsed on the
// watched thread.
base::ThreadTicks current_cpu_time = GetWatchedThreadTime();
base::TimeDelta time_since_arm = current_cpu_time - arm_cpu_time_;
if (use_thread_cpu_time_ && (time_since_arm < timeout_)) {
base::ThreadTicks GpuWatchdogThread::GetWatchedThreadTime() {
DCHECK(watched_thread_handle_);
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV1::OnCheckTimeout,
weak_factory_.GetWeakPtr()),
timeout_ - time_since_arm);
return;
if (base::ThreadTicks::IsSupported()) {
// Note: GetForThread() might return bogus results if running on different
// CPUs between two calls.
return base::ThreadTicks::GetForThread(
base::PlatformThreadHandle(watched_thread_handle_));
} else {
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
BOOL result = GetThreadTimes(watched_thread_handle_, &creation_time,
&exit_time, &kernel_time, &user_time);
if (!result)
return base::ThreadTicks();
// Need to bit_cast to fix alignment, then divide by 10 to convert
// 100-nanoseconds to microseconds.
int64_t user_time_us = bit_cast<int64_t, FILETIME>(user_time) / 10;
int64_t kernel_time_us = bit_cast<int64_t, FILETIME>(kernel_time) / 10;
return base::ThreadTicks() +
base::TimeDelta::FromMicroseconds(user_time_us + kernel_time_us);
}
}
#endif
// For minimal developer annoyance, don't keep terminating. You need to skip
// the call to base::Process::Terminate below in a debugger for this to be
// useful.
static bool terminated = false;
if (terminated)
bool GpuWatchdogThread::WatchedThreadGetsExtraTimeout(bool no_gpu_hang) {
if (max_extra_cycles_before_kill_ == 0)
return false;
// We want to record histograms even if there is no gpu hang.
bool allows_more_timeouts = false;
WatchedThreadGetsExtraTimeoutHistogram(no_gpu_hang);
if (no_gpu_hang) {
if (count_of_extra_cycles_ > 0) {
count_of_extra_cycles_ = 0;
}
} else if (count_of_extra_cycles_ < max_extra_cycles_before_kill_) {
count_of_extra_cycles_++;
allows_more_timeouts = true;
}
return allows_more_timeouts;
}
void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
// If this is for gpu testing, do not terminate the gpu process.
if (is_test_mode_) {
test_result_timeout_and_gpu_hang_.Set();
return;
}
#if defined(OS_WIN)
if (IsDebuggerPresent())
return;
#endif
#if defined(USE_X11)
// Don't crash if we're not on the TTY of our host X11 server.
UpdateActiveTTY();
if (host_tty_ != -1 && active_tty_ != -1 && host_tty_ != active_tty_) {
OnAcknowledge();
return;
}
#endif
// Store variables so they're available in crash dumps to help determine the
// cause of any hang.
// Store variables so they're available in crash dumps to help determine the
// cause of any hang.
base::TimeTicks function_begin_timeticks = base::TimeTicks::Now();
base::debug::Alias(&in_gpu_initialization_);
base::debug::Alias(&num_of_timeout_after_power_resume_);
base::debug::Alias(&num_of_timeout_after_foregrounded_);
base::debug::Alias(&function_begin_timeticks);
base::debug::Alias(&watchdog_start_timeticks_);
base::debug::Alias(&power_suspend_timeticks_);
base::debug::Alias(&power_resume_timeticks_);
base::debug::Alias(&backgrounded_timeticks_);
base::debug::Alias(&foregrounded_timeticks_);
base::debug::Alias(&watchdog_pause_timeticks_);
base::debug::Alias(&watchdog_resume_timeticks_);
base::debug::Alias(&in_power_suspension_);
base::debug::Alias(&in_gpu_process_teardown_);
base::debug::Alias(&is_backgrounded_);
base::debug::Alias(&is_add_power_observer_called_);
base::debug::Alias(&is_power_observer_added_);
base::debug::Alias(&last_on_watchdog_timeout_timeticks_);
base::TimeDelta timeticks_elapses =
function_begin_timeticks - last_on_watchdog_timeout_timeticks_;
base::debug::Alias(&timeticks_elapses);
base::debug::Alias(&max_extra_cycles_before_kill_);
#if defined(OS_WIN)
ULONGLONG fire_interrupt_time;
QueryUnbiasedInterruptTime(&fire_interrupt_time);
// This is the time since the watchdog was armed, in 100ns intervals,
// ignoring time where the computer is suspended.
ULONGLONG interrupt_delay = fire_interrupt_time - arm_interrupt_time_;
base::debug::Alias(&interrupt_delay);
base::debug::Alias(&current_cpu_time);
base::debug::Alias(&time_since_arm);
base::debug::Alias(&remaining_watched_thread_ticks_);
base::debug::Alias(&less_than_full_thread_time_after_capped_);
#endif
bool using_thread_ticks = base::ThreadTicks::IsSupported();
base::debug::Alias(&using_thread_ticks);
GpuWatchdogHistogram(GpuWatchdogThreadEvent::kGpuWatchdogKill);
bool using_high_res_timer = base::TimeTicks::IsHighResolution();
base::debug::Alias(&using_high_res_timer);
#endif
crash_keys::gpu_watchdog_crashed_in_gpu_init.Set(
in_gpu_initialization_ ? "1" : "0");
int32_t awaiting_acknowledge =
base::subtle::NoBarrier_Load(&awaiting_acknowledge_);
base::debug::Alias(&awaiting_acknowledge);
// Don't log the message to stderr in release builds because the buffer
// may be full.
std::string message = base::StringPrintf(
"The GPU process hung. Terminating after %" PRId64 " ms.",
timeout_.InMilliseconds());
logging::LogMessageHandlerFunction handler = logging::GetLogMessageHandler();
if (handler)
handler(logging::LOG_ERROR, __FILE__, __LINE__, 0, message);
DLOG(ERROR) << message;
base::Time current_time = base::Time::Now();
base::TimeTicks current_timeticks = base::TimeTicks::Now();
base::debug::Alias(&current_time);
base::debug::Alias(&current_timeticks);
int64_t available_physical_memory =
base::SysInfo::AmountOfAvailablePhysicalMemory() >> 20;
crash_keys::available_physical_memory_in_mb.Set(
base::NumberToString(available_physical_memory));
gl::ShaderTracking* shader_tracking = gl::ShaderTracking::GetInstance();
if (shader_tracking) {
std::string shaders[2];
shader_tracking->GetShaders(shaders, shaders + 1);
crash_keys::current_shader_0.Set(shaders[0]);
crash_keys::current_shader_1.Set(shaders[1]);
}
crash_keys::gpu_watchdog_kill_after_power_resume.Set(
WithinOneMinFromPowerResumed() ? "1" : "0");
// Check it one last time before crashing.
if (!base::subtle::NoBarrier_Load(&awaiting_acknowledge_)) {
OnAcknowledge();
return;
}
crash_keys::num_of_processors.Set(base::NumberToString(num_of_processors_));
terminated = true;
// Check the arm_disarm_counter value one more time.
auto last_arm_disarm_counter = ReadArmDisarmCounter();
base::debug::Alias(&last_arm_disarm_counter);
// Use RESULT_CODE_HUNG so this crash is separated from other
// EXCEPTION_ACCESS_VIOLATION buckets for UMA analysis.
......@@ -466,90 +700,177 @@ void GpuWatchdogThreadImplV1::DeliberatelyTerminateToRecoverFromHang() {
base::Process::TerminateCurrentProcessImmediately(RESULT_CODE_HUNG);
}
void GpuWatchdogThreadImplV1::AddPowerObserver() {
// As we stop the task runner before destroying this class, the unretained
// reference will always outlive the task.
task_runner()->PostTask(
FROM_HERE, base::BindOnce(&GpuWatchdogThreadImplV1::OnAddPowerObserver,
base::Unretained(this)));
void GpuWatchdogThread::GpuWatchdogHistogram(
GpuWatchdogThreadEvent thread_event) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Event", thread_event);
}
void GpuWatchdogThreadImplV1::OnAddPowerObserver() {
DCHECK(base::PowerMonitor::IsInitialized());
base::PowerMonitor::AddObserver(this);
}
void GpuWatchdogThread::GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent timeout_event) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout", timeout_event);
void GpuWatchdogThreadImplV1::OnSuspend() {
power_suspend_ref_ = suspension_counter_.Take();
}
bool recorded = false;
if (in_gpu_initialization_) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.Init",
timeout_event);
recorded = true;
}
if (WithinOneMinFromPowerResumed()) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.PowerResume",
timeout_event);
recorded = true;
}
void GpuWatchdogThreadImplV1::OnResume() {
power_suspend_ref_.reset();
if (WithinOneMinFromForegrounded()) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.Foregrounded",
timeout_event);
recorded = true;
}
if (!recorded) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.Normal",
timeout_event);
}
}
void GpuWatchdogThreadImplV1::OnBackgroundedOnWatchdogThread() {
background_suspend_ref_ = suspension_counter_.Take();
#if defined(OS_WIN)
void GpuWatchdogThread::RecordExtraThreadTimeHistogram() {
// Record the number of timeouts the GPU main thread needs to make a progress
// after GPU OnWatchdogTimeout() is triggered. The maximum count is 6 which
// is more than kMaxCountOfMoreGpuThreadTimeAllowed(4);
constexpr int kMin = 1;
constexpr int kMax = 6;
constexpr int kBuckets = 6;
int count = count_of_more_gpu_thread_time_allowed_;
bool recorded = false;
base::UmaHistogramCustomCounts("GPU.WatchdogThread.ExtraThreadTime", count,
kMin, kMax, kBuckets);
if (in_gpu_initialization_) {
base::UmaHistogramCustomCounts("GPU.WatchdogThread.ExtraThreadTime.Init",
count, kMin, kMax, kBuckets);
recorded = true;
}
if (WithinOneMinFromPowerResumed()) {
base::UmaHistogramCustomCounts(
"GPU.WatchdogThread.ExtraThreadTime.PowerResume", count, kMin, kMax,
kBuckets);
recorded = true;
}
if (WithinOneMinFromForegrounded()) {
base::UmaHistogramCustomCounts(
"GPU.WatchdogThread.ExtraThreadTime.Foregrounded", count, kMin, kMax,
kBuckets);
recorded = true;
}
if (!recorded) {
base::UmaHistogramCustomCounts("GPU.WatchdogThread.ExtraThreadTime.Normal",
count, kMin, kMax, kBuckets);
}
}
void GpuWatchdogThreadImplV1::OnForegroundedOnWatchdogThread() {
background_suspend_ref_.reset();
void GpuWatchdogThread::RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(
int count) {
constexpr int kMax = 4;
base::UmaHistogramExactLinear("GPU.WatchdogThread.ExtraThreadTime.NumOfUsers",
count, kMax);
}
void GpuWatchdogThreadImplV1::SuspendStateChanged() {
if (suspension_counter_.HasRefs()) {
suspend_time_ = base::Time::Now();
// When suspending force an acknowledgement to cancel any pending
// termination tasks.
OnAcknowledge();
void GpuWatchdogThread::WatchedThreadNeedsMoreThreadTimeHistogram(
bool no_gpu_hang_detected,
bool start_of_more_thread_time) {
if (start_of_more_thread_time) {
// This is the start of allowing more thread time. Only record it once for
// all following timeouts on the same detected gpu hang, so we know this
// is equivlent one crash in our crash reports.
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kMoreThreadTime);
RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(0);
} else {
resume_time_ = base::Time::Now();
// After resuming jump-start the watchdog again.
armed_ = false;
OnCheck(true);
if (count_of_more_gpu_thread_time_allowed_ > 0) {
if (no_gpu_hang_detected) {
// If count_of_more_gpu_thread_time_allowed_ > 0, we know extra time was
// extended in the previous OnWatchdogTimeout(). Now we find gpu makes
// progress. Record this case.
GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent::kProgressAfterMoreThreadTime);
RecordExtraThreadTimeHistogram();
} else {
if (count_of_more_gpu_thread_time_allowed_ >=
kMaxCountOfMoreGpuThreadTimeAllowed) {
GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent::kLessThanFullThreadTimeAfterCapped);
}
}
// Records the number of users who are still waiting. We can use this
// number to calculate the number of users who had already quit.
RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(
count_of_more_gpu_thread_time_allowed_);
// Used by GPU.WatchdogThread.WaitTime later
time_in_wait_for_full_thread_time_ =
count_of_more_gpu_thread_time_allowed_ * watchdog_timeout_;
}
}
}
#endif
void GpuWatchdogThread::WatchedThreadGetsExtraTimeoutHistogram(
bool no_gpu_hang) {
constexpr int kMax = 60;
if (count_of_extra_cycles_ == 0 && !no_gpu_hang) {
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kTimeoutWait);
base::UmaHistogramExactLinear("GPU.WatchdogThread.WaitTime.NumOfUsers", 0,
kMax);
} else if (count_of_extra_cycles_ > 0) {
int count = watchdog_timeout_.InSeconds() * count_of_extra_cycles_;
base::UmaHistogramExactLinear("GPU.WatchdogThread.WaitTime.NumOfUsers",
count, kMax);
if (no_gpu_hang) {
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kProgressAfterWait);
base::UmaHistogramExactLinear(
"GPU.WatchdogThread.WaitTime.ProgressAfterWait", count, kMax);
#if defined(OS_WIN)
base::ThreadTicks GpuWatchdogThreadImplV1::GetWatchedThreadTime() {
if (base::ThreadTicks::IsSupported()) {
// Convert ThreadTicks::Now() to TimeDelta.
return base::ThreadTicks::GetForThread(
base::PlatformThreadHandle(watched_thread_handle_));
} else {
// Use GetThreadTimes as a backup mechanism.
FILETIME creation_time;
FILETIME exit_time;
FILETIME user_time;
FILETIME kernel_time;
BOOL result = GetThreadTimes(watched_thread_handle_, &creation_time,
&exit_time, &kernel_time, &user_time);
DCHECK(result);
ULARGE_INTEGER user_time64;
user_time64.HighPart = user_time.dwHighDateTime;
user_time64.LowPart = user_time.dwLowDateTime;
ULARGE_INTEGER kernel_time64;
kernel_time64.HighPart = kernel_time.dwHighDateTime;
kernel_time64.LowPart = kernel_time.dwLowDateTime;
// Time is reported in units of 100 nanoseconds. Kernel and user time are
// summed to deal with to kinds of hangs. One is where the GPU process is
// stuck in user level, never calling into the kernel and kernel time is
// not increasing. The other is where either the kernel hangs and never
// returns to user level or where user level code
// calls into kernel level repeatedly, giving up its quanta before it is
// tracked, for example a loop that repeatedly Sleeps.
return base::ThreadTicks() +
base::TimeDelta::FromMilliseconds(static_cast<int64_t>(
(user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
// Add the time the GPU thread was given for the full thread time up to 60
// seconds. GPU.WatchdogThread.WaitTime is essentially equal to
// GPU.WatchdogThread.WaitTime.ProgressAfterWait on non-Windows systems.
base::TimeDelta wait_time = base::TimeDelta::FromSeconds(count);
wait_time += time_in_wait_for_full_thread_time_;
constexpr base::TimeDelta kMinTime = base::TimeDelta::FromSeconds(1);
constexpr base::TimeDelta kMaxTime = base::TimeDelta::FromSeconds(150);
constexpr int kBuckets = 50;
// The time the GPU main thread takes to finish a task after a "hang" is
// dectedted.
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime", wait_time,
kMinTime, kMaxTime, kBuckets);
#endif
}
}
}
#endif
bool GpuWatchdogThread::WithinOneMinFromPowerResumed() {
size_t count = base::ClampFloor<size_t>(base::TimeDelta::FromMinutes(1) /
watchdog_timeout_);
return power_resumed_event_ && num_of_timeout_after_power_resume_ <= count;
}
bool GpuWatchdogThread::WithinOneMinFromForegrounded() {
size_t count = base::ClampFloor<size_t>(base::TimeDelta::FromMinutes(1) /
watchdog_timeout_);
return foregrounded_event_ && num_of_timeout_after_foregrounded_ <= count;
}
#if defined(USE_X11)
void GpuWatchdogThreadImplV1::UpdateActiveTTY() {
void GpuWatchdogThread::UpdateActiveTTY() {
last_active_tty_ = active_tty_;
active_tty_ = -1;
......@@ -564,7 +885,45 @@ void GpuWatchdogThreadImplV1::UpdateActiveTTY() {
}
#endif
GpuWatchdogThread::GpuWatchdogThread() : base::Thread("GpuWatchdog") {}
GpuWatchdogThread::~GpuWatchdogThread() {}
bool GpuWatchdogThread::ContinueOnNonHostX11ServerTty() {
#if defined(USE_X11)
if (host_tty_ == -1 || active_tty_ == -1)
return false;
// Don't crash if we're not on the TTY of our host X11 server.
if (active_tty_ != host_tty_) {
// Only record for the time there is a change on TTY
if (last_active_tty_ == active_tty_) {
GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent::kContinueOnNonHostServerTty);
}
return true;
}
#endif
return false;
}
// For gpu testing only. Return whether a GPU hang was detected or not.
bool GpuWatchdogThread::IsGpuHangDetectedForTesting() {
DCHECK(is_test_mode_);
return test_result_timeout_and_gpu_hang_.IsSet();
}
// This should be called on the test main thread only. It will wait until the
// power observer is added on the watchdog thread.
void GpuWatchdogThread::WaitForPowerObserverAddedForTesting() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
DCHECK(is_add_power_observer_called_);
// Just return if it has been added.
if (is_power_observer_added_)
return;
base::WaitableEvent event;
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&base::WaitableEvent::Signal, base::Unretained(&event)));
event.Wait();
}
} // namespace gpu
......@@ -66,213 +66,279 @@ enum class GpuWatchdogTimeoutEvent {
kMaxValue = kSlowWatchdogThread,
};
#if defined(OS_WIN)
// If the actual time the watched GPU thread spent doing actual work is less
// than the watchdog timeout, the GPU thread can continue running through
// OnGPUWatchdogTimeout for at most 4 times before the gpu thread is killed.
constexpr int kMaxCountOfMoreGpuThreadTimeAllowed = 3;
#endif
constexpr int kMaxExtraCyclesBeforeKill = 0;
// A thread that intermitently sends tasks to a group of watched message loops
// and deliberately crashes if one of them does not respond after a timeout.
class GPU_IPC_SERVICE_EXPORT GpuWatchdogThread : public base::Thread,
public base::PowerObserver,
public base::TaskObserver,
public gl::ProgressReporter {
public:
static std::unique_ptr<GpuWatchdogThread> Create(bool start_backgrounded);
static std::unique_ptr<GpuWatchdogThread> Create(
bool start_backgrounded,
base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool test_mode);
~GpuWatchdogThread() override;
// Must be called after a PowerMonitor has been created. Can be called from
// any thread.
virtual void AddPowerObserver() = 0;
void AddPowerObserver();
// Notifies the watchdog when Chrome is backgrounded / foregrounded. Should
// only be used if Chrome is completely backgrounded and not expected to
// render (all windows backgrounded and not producing frames).
virtual void OnBackgrounded() = 0;
virtual void OnForegrounded() = 0;
void OnBackgrounded();
void OnForegrounded();
// The watchdog starts armed to catch startup hangs, and needs to be disarmed
// once init is complete, before executing tasks.
virtual void OnInitComplete() = 0;
void OnInitComplete();
// Notifies the watchdog when the GPU child process is being destroyed.
// This function is called directly from
// viz::GpuServiceImpl::~GpuServiceImpl()
virtual void OnGpuProcessTearDown() = 0;
void OnGpuProcessTearDown();
// Pause the GPU watchdog to stop the timeout task. If the current heavy task
// is not running on the GPU driver, the watchdog can be paused to avoid
// unneeded crash.
virtual void PauseWatchdog() = 0;
void PauseWatchdog();
// Continue the watchdog after a pause.
virtual void ResumeWatchdog() = 0;
void ResumeWatchdog();
// For gpu testing only. Return status for the watchdog tests
virtual bool IsGpuHangDetectedForTesting() = 0;
bool IsGpuHangDetectedForTesting();
virtual void WaitForPowerObserverAddedForTesting() {}
void WaitForPowerObserverAddedForTesting();
protected:
GpuWatchdogThread();
private:
DISALLOW_COPY_AND_ASSIGN(GpuWatchdogThread);
};
// Implements base::Thread.
void Init() override;
void CleanUp() override;
class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV1
: public GpuWatchdogThread {
public:
~GpuWatchdogThreadImplV1() override;
static std::unique_ptr<GpuWatchdogThreadImplV1> Create(
bool start_backgrounded);
// Implements GpuWatchdogThread.
void AddPowerObserver() override;
void OnBackgrounded() override;
void OnForegrounded() override;
void OnInitComplete() override {}
void OnGpuProcessTearDown() override {}
void ResumeWatchdog() override {}
void PauseWatchdog() override {}
bool IsGpuHangDetectedForTesting() override;
// gl::ProgressReporter implementation:
// Implements gl::ProgressReporter.
void ReportProgress() override;
// Implements TaskObserver.
void WillProcessTask(const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) override;
void DidProcessTask(const base::PendingTask& pending_task) override;
// Implements base::PowerObserver.
void OnSuspend() override;
void OnResume() override;
protected:
void Init() override;
void CleanUp() override;
GpuWatchdogThread();
private:
// An object of this type intercepts the reception and completion of all tasks
// on the watched thread and checks whether the watchdog is armed.
class GpuWatchdogTaskObserver : public base::TaskObserver {
public:
explicit GpuWatchdogTaskObserver(GpuWatchdogThreadImplV1* watchdog);
~GpuWatchdogTaskObserver() override;
// Implements TaskObserver.
void WillProcessTask(const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) override;
void DidProcessTask(const base::PendingTask& pending_task) override;
private:
GpuWatchdogThreadImplV1* watchdog_;
enum PauseResumeSource {
kAndroidBackgroundForeground = 0,
kPowerSuspendResume = 1,
kGeneralGpuFlow = 2,
};
// A helper class which allows multiple clients to suspend/resume the
// watchdog thread. As we need to suspend resume on both background /
// foreground events as well as power events, this class manages a ref-count
// of suspend requests.
class SuspensionCounter {
public:
SuspensionCounter(GpuWatchdogThreadImplV1* watchdog_thread);
class SuspensionCounterRef {
public:
explicit SuspensionCounterRef(SuspensionCounter* counter);
~SuspensionCounterRef();
GpuWatchdogThread(base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool test_mode);
void OnAddPowerObserver();
void RestartWatchdogTimeoutTask(PauseResumeSource source_of_request);
void StopWatchdogTimeoutTask(PauseResumeSource source_of_request);
void UpdateInitializationFlag();
void Arm();
void Disarm();
void InProgress();
bool IsArmed();
base::subtle::Atomic32 ReadArmDisarmCounter();
void OnWatchdogTimeout();
bool SlowWatchdogThread();
bool WatchedThreadNeedsMoreThreadTime(bool no_gpu_hang_detected);
#if defined(OS_WIN)
base::ThreadTicks GetWatchedThreadTime();
#endif
bool WatchedThreadGetsExtraTimeout(bool no_gpu_hang);
private:
SuspensionCounter* counter_;
};
// Do not change the function name. It is used for [GPU HANG] carsh reports.
void DeliberatelyTerminateToRecoverFromHang();
// This class must outlive SuspensionCounterRefs.
std::unique_ptr<SuspensionCounterRef> Take();
// Records "GPU.WatchdogThread.Event".
void GpuWatchdogHistogram(GpuWatchdogThreadEvent thread_event);
// Used to update the |watchdog_thread_sequence_checker_|.
void OnWatchdogThreadStopped();
// Histogram recorded in OnWatchdogTimeout()
// Records "GPU.WatchdogThread.Timeout"
void GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent timeout_event);
bool HasRefs() const;
#if defined(OS_WIN)
// The extra thread time the GPU main thread needs to make a progress.
// Records "GPU.WatchdogThread.ExtraThreadTime".
void RecordExtraThreadTimeHistogram();
// The number of users per timeout stay in Chrome after giving extra thread
// time. Records "GPU.WatchdogThread.ExtraThreadTime.NumOfUsers" and
// "GPU.WatchdogThread.Timeout".
void RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(int count);
// Histograms recorded for WatchedThreadNeedsMoreThreadTime() function.
void WatchedThreadNeedsMoreThreadTimeHistogram(
bool no_gpu_hang_detected,
bool start_of_more_thread_time);
#endif
private:
void OnAddRef();
void OnReleaseRef();
GpuWatchdogThreadImplV1* watchdog_thread_;
uint32_t suspend_count_ = 0;
// The number of users stay in Chrome after the extra timeout wait cycles.
// Records "GPU.WatchdogThread.WaitTime.ProgressAfterWait",
// "GPU.WatchdogThread.WaitTime.NumOfUsers" and "GPU.WatchdogThread.Timeout".
void WatchedThreadGetsExtraTimeoutHistogram(bool no_gpu_hang);
SEQUENCE_CHECKER(watchdog_thread_sequence_checker_);
};
GpuWatchdogThreadImplV1();
// Used for metrics. It's 1 minute after the event.
bool WithinOneMinFromPowerResumed();
bool WithinOneMinFromForegrounded();
void CheckArmed();
#if defined(USE_X11)
void UpdateActiveTTY();
#endif
// The watchdog continues when it's not on the TTY of our host X11 server.
bool ContinueOnNonHostX11ServerTty();
void OnAcknowledge();
void OnCheck(bool after_suspend);
void OnCheckTimeout();
// Do not change the function name. It is used for [GPU HANG] carsh reports.
void DeliberatelyTerminateToRecoverFromHang();
// This counter is only written on the gpu thread, and read on both threads.
volatile base::subtle::Atomic32 arm_disarm_counter_ = 0;
// The counter number read in the last OnWatchdogTimeout() on the watchdog
// thread.
int32_t last_arm_disarm_counter_ = 0;
void OnAddPowerObserver();
// Timeout on the watchdog thread to check if gpu hangs.
base::TimeDelta watchdog_timeout_;
// Implement PowerObserver.
void OnSuspend() override;
void OnResume() override;
// The one-time watchdog timeout multiplier in the gpu initialization.
int watchdog_init_factor_;
// Handle background/foreground.
void OnBackgroundedOnWatchdogThread();
void OnForegroundedOnWatchdogThread();
// The one-time watchdog timeout multiplier after the watchdog pauses and
// restarts.
int watchdog_restart_factor_;
void SuspendStateChanged();
// The time the gpu watchdog was created.
base::TimeTicks watchdog_start_timeticks_;
#if defined(OS_WIN)
base::ThreadTicks GetWatchedThreadTime();
#endif
// The time the last OnSuspend and OnResume was called.
base::TimeTicks power_suspend_timeticks_;
base::TimeTicks power_resume_timeticks_;
#if defined(USE_X11)
void UpdateActiveTTY();
#endif
// The time the last OnBackgrounded and OnForegrounded was called.
base::TimeTicks backgrounded_timeticks_;
base::TimeTicks foregrounded_timeticks_;
scoped_refptr<base::SingleThreadTaskRunner> watched_task_runner_;
base::TimeDelta timeout_;
bool armed_;
GpuWatchdogTaskObserver task_observer_;
// The time PauseWatchdog and ResumeWatchdog was called.
base::TimeTicks watchdog_pause_timeticks_;
base::TimeTicks watchdog_resume_timeticks_;
// |awaiting_acknowledge_| is only ever read on the watched thread, but may
// be modified on either the watched or watchdog thread. Reads/writes should
// be careful to ensure that appropriate synchronization is used.
base::subtle::Atomic32 awaiting_acknowledge_;
// TimeTicks: Tracking the amount of time a task runs. Executing delayed
// tasks at the right time.
// ThreadTicks: Use this timer to (approximately) measure how much time the
// calling thread spent doing actual work vs. being de-scheduled.
// True if the watchdog should wait for a certain amount of CPU to be used
// before killing the process.
bool use_thread_cpu_time_;
// The time the last OnWatchdogTimeout() was called.
base::TimeTicks last_on_watchdog_timeout_timeticks_;
// The number of consecutive acknowledgements that had a latency less than
// 50ms.
int responsive_acknowledge_count_;
// The wall-clock time the next OnWatchdogTimeout() will be called.
base::Time next_on_watchdog_timeout_time_;
#if defined(OS_WIN)
void* watched_thread_handle_;
base::ThreadTicks arm_cpu_time_;
base::ThreadTicks last_on_watchdog_timeout_thread_ticks_;
// This measures the time that the system has been running, in units of 100
// ns.
ULONGLONG arm_interrupt_time_;
#endif
// The difference between the timeout and the actual time the watched thread
// spent doing actual work.
base::TimeDelta remaining_watched_thread_ticks_;
// Time after which it's assumed that the computer has been suspended since
// the task was posted.
base::Time suspension_timeout_;
// The Windows thread hanndle of the watched GPU main thread.
void* watched_thread_handle_ = nullptr;
SuspensionCounter suspension_counter_;
std::unique_ptr<SuspensionCounter::SuspensionCounterRef> power_suspend_ref_;
std::unique_ptr<SuspensionCounter::SuspensionCounterRef>
background_suspend_ref_;
// After GPU hang detected, how many times has the GPU thread been allowed to
// continue due to not enough thread time.
int count_of_more_gpu_thread_time_allowed_ = 0;
// The time the last OnSuspend and OnResume was called.
base::Time suspend_time_;
base::Time resume_time_;
// The total timeout, up to 60 seconds, the watchdog thread waits for the GPU
// main thread to get full thread time.
base::TimeDelta time_in_wait_for_full_thread_time_;
// This is the time the last check was sent.
base::Time check_time_;
base::TimeTicks check_timeticks_;
// After detecting GPU hang and continuing running through
// OnGpuWatchdogTimeout for the max cycles, the GPU main thread still cannot
// get the full thread time.
bool less_than_full_thread_time_after_capped_ = false;
#endif
#if defined(USE_X11)
FILE* tty_file_;
int host_tty_;
FILE* tty_file_ = nullptr;
int host_tty_ = -1;
int active_tty_ = -1;
int last_active_tty_ = -1;
#endif
base::WeakPtrFactory<GpuWatchdogThreadImplV1> weak_factory_{this};
// The system has entered the power suspension mode.
bool in_power_suspension_ = false;
// The GPU process has started tearing down. Accessed only in the gpu process.
bool in_gpu_process_teardown_ = false;
// Chrome is running on the background on Android. Gpu is probably very slow
// or stalled.
bool is_backgrounded_ = false;
// The GPU watchdog is paused. The timeout task is temporarily stopped.
bool is_paused_ = false;
// Whether the watchdog thread has been called and added to the power monitor
// observer.
bool is_add_power_observer_called_ = false;
bool is_power_observer_added_ = false;
// whether GpuWatchdogThreadEvent::kGpuWatchdogStart has been recorded.
bool is_watchdog_start_histogram_recorded = false;
// Read/Write by the watchdog thread only after initialized in the
// constructor.
bool in_gpu_initialization_ = false;
DISALLOW_COPY_AND_ASSIGN(GpuWatchdogThreadImplV1);
// The number of logical processors/cores on the current machine.
int num_of_processors_ = 0;
// Don't kill the GPU process immediately after a gpu hang is detected. Wait
// for extra cycles of timeout. Kill it, if the GPU still doesn't respond
// after wait.
const int max_extra_cycles_before_kill_;
// how many cycles of timeout since we detect a hang.
int count_of_extra_cycles_ = 0;
// For the experiment and the debugging purpose
size_t num_of_timeout_after_power_resume_ = 0;
size_t num_of_timeout_after_foregrounded_ = 0;
bool foregrounded_event_ = false;
bool power_resumed_event_ = false;
// For gpu testing only.
const bool is_test_mode_;
// Set by the watchdog thread and Read by the test thread.
base::AtomicFlag test_result_timeout_and_gpu_hang_;
scoped_refptr<base::SingleThreadTaskRunner> watched_gpu_task_runner_;
scoped_refptr<base::SingleThreadTaskRunner> watchdog_thread_task_runner_;
base::WeakPtr<GpuWatchdogThread> weak_ptr_;
base::WeakPtrFactory<GpuWatchdogThread> weak_factory_{this};
DISALLOW_COPY_AND_ASSIGN(GpuWatchdogThread);
};
} // namespace gpu
#endif // GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_H_
......@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "gpu/ipc/service/gpu_watchdog_thread.h"
#include "base/test/task_environment.h"
#include "gpu/ipc/service/gpu_watchdog_thread_v2.h"
#include "base/power_monitor/power_monitor.h"
#include "base/power_monitor/power_monitor_source.h"
......@@ -70,7 +70,7 @@ void GpuWatchdogTest::SetUp() {
ASSERT_TRUE(base::CurrentThread::IsSet());
// Set watchdog timeout to 1000 milliseconds
watchdog_thread_ = gpu::GpuWatchdogThreadImplV2::Create(
watchdog_thread_ = gpu::GpuWatchdogThread::Create(
/*start_backgrounded*/ false,
/*timeout*/ kGpuWatchdogTimeoutForTesting,
/*init_factor*/ kInitFactor,
......
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "gpu/ipc/service/gpu_watchdog_thread_v2.h"
#include "base/atomicops.h"
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/bit_cast.h"
#include "base/debug/alias.h"
#include "base/debug/dump_without_crashing.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_functions.h"
#include "base/native_library.h"
#include "base/numerics/safe_conversions.h"
#include "base/power_monitor/power_monitor.h"
#include "base/process/process.h"
#include "base/strings/string_number_conversions.h"
#include "base/system/sys_info.h"
#include "base/task/current_thread.h"
#include "base/threading/platform_thread.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "gpu/config/gpu_crash_keys.h"
#include "gpu/config/gpu_finch_features.h"
#include "gpu/ipc/common/result_codes.h"
#if defined(OS_WIN)
#include "base/win/windows_version.h"
#endif
namespace gpu {
#if defined(OS_WIN)
base::TimeDelta GetGpuWatchdogTimeoutBasedOnCpuCores() {
if (base::win::GetVersion() >= base::win::Version::WIN10) {
int num_of_processors = base::SysInfo::NumberOfProcessors();
if (num_of_processors > 8)
return (kGpuWatchdogTimeout - base::TimeDelta::FromSeconds(10));
else if (num_of_processors <= 4)
return kGpuWatchdogTimeout + base::TimeDelta::FromSeconds(5);
}
return kGpuWatchdogTimeout;
}
#endif
GpuWatchdogThreadImplV2::GpuWatchdogThreadImplV2(
base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool is_test_mode)
: watchdog_timeout_(timeout),
watchdog_init_factor_(init_factor),
watchdog_restart_factor_(restart_factor),
in_gpu_initialization_(true),
max_extra_cycles_before_kill_(max_extra_cycles_before_kill),
is_test_mode_(is_test_mode),
watched_gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()) {
base::CurrentThread::Get()->AddTaskObserver(this);
num_of_processors_ = base::SysInfo::NumberOfProcessors();
#if defined(OS_WIN)
// GetCurrentThread returns a pseudo-handle that cannot be used by one thread
// to identify another. DuplicateHandle creates a "real" handle that can be
// used for this purpose.
if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
GetCurrentProcess(), &watched_thread_handle_,
THREAD_QUERY_INFORMATION, FALSE, 0)) {
watched_thread_handle_ = nullptr;
}
#endif
#if defined(USE_X11)
tty_file_ = base::OpenFile(
base::FilePath(FILE_PATH_LITERAL("/sys/class/tty/tty0/active")), "r");
UpdateActiveTTY();
host_tty_ = active_tty_;
#endif
Arm();
}
GpuWatchdogThreadImplV2::~GpuWatchdogThreadImplV2() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Stop() might take too long and the watchdog timeout is triggered.
// Disarm first before calling Stop() to avoid a crash.
if (IsArmed())
Disarm();
PauseWatchdog();
Stop(); // stop the watchdog thread
base::CurrentThread::Get()->RemoveTaskObserver(this);
base::PowerMonitor::RemoveObserver(this);
GpuWatchdogHistogram(GpuWatchdogThreadEvent::kGpuWatchdogEnd);
#if defined(OS_WIN)
if (watched_thread_handle_)
CloseHandle(watched_thread_handle_);
#endif
#if defined(USE_X11)
if (tty_file_)
fclose(tty_file_);
#endif
}
// static
std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
bool start_backgrounded,
base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool is_test_mode) {
auto watchdog_thread = base::WrapUnique(
new GpuWatchdogThreadImplV2(timeout, init_factor, restart_factor,
max_extra_cycles_before_kill, is_test_mode));
base::Thread::Options options;
options.timer_slack = base::TIMER_SLACK_MAXIMUM;
watchdog_thread->StartWithOptions(options);
if (start_backgrounded)
watchdog_thread->OnBackgrounded();
return watchdog_thread;
}
// static
std::unique_ptr<GpuWatchdogThreadImplV2> GpuWatchdogThreadImplV2::Create(
bool start_backgrounded) {
base::TimeDelta gpu_watchdog_timeout = kGpuWatchdogTimeout;
int init_factor = kInitFactor;
int restart_factor = kRestartFactor;
int max_extra_cycles_before_kill = kMaxExtraCyclesBeforeKill;
if (base::FeatureList::IsEnabled(features::kGpuWatchdogV2NewTimeout)) {
const char kNewTimeOutParam[] = "new_time_out";
const char kMaxExtraCyclesBeforeKillParam[] =
"max_extra_cycles_before_kill";
#if defined(OS_WIN)
// The purpose of finch on Windows is to know the impact of the number of
// CPU cores while the rest of platforms are to try a different watchdog
// timeout length.
gpu_watchdog_timeout = GetGpuWatchdogTimeoutBasedOnCpuCores();
constexpr int kFinchMaxExtraCyclesBeforeKill = 0;
#elif defined(OS_ANDROID)
constexpr int kFinchMaxExtraCyclesBeforeKill = 0;
init_factor = kInitFactorFinch;
restart_factor = kRestartFactorFinch;
#elif defined(OS_MAC)
constexpr int kFinchMaxExtraCyclesBeforeKill = 1;
#else
constexpr int kFinchMaxExtraCyclesBeforeKill = 2;
#endif
int timeout = base::GetFieldTrialParamByFeatureAsInt(
features::kGpuWatchdogV2NewTimeout, kNewTimeOutParam,
gpu_watchdog_timeout.InSeconds());
gpu_watchdog_timeout = base::TimeDelta::FromSeconds(timeout);
max_extra_cycles_before_kill = base::GetFieldTrialParamByFeatureAsInt(
features::kGpuWatchdogV2NewTimeout, kMaxExtraCyclesBeforeKillParam,
kFinchMaxExtraCyclesBeforeKill);
}
return Create(start_backgrounded, gpu_watchdog_timeout, init_factor,
restart_factor, max_extra_cycles_before_kill, false);
}
// Do not add power observer during watchdog init, PowerMonitor might not be up
// running yet.
void GpuWatchdogThreadImplV2::AddPowerObserver() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Forward it to the watchdog thread. Call PowerMonitor::AddObserver on the
// watchdog thread so that OnSuspend and OnResume will be called on watchdog
// thread.
is_add_power_observer_called_ = true;
task_runner()->PostTask(
FROM_HERE, base::BindOnce(&GpuWatchdogThreadImplV2::OnAddPowerObserver,
base::Unretained(this)));
}
// Android Chrome goes to the background. Called from the gpu thread.
void GpuWatchdogThreadImplV2::OnBackgrounded() {
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::StopWatchdogTimeoutTask,
base::Unretained(this), kAndroidBackgroundForeground));
}
// Android Chrome goes to the foreground. Called from the gpu thread.
void GpuWatchdogThreadImplV2::OnForegrounded() {
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::RestartWatchdogTimeoutTask,
base::Unretained(this), kAndroidBackgroundForeground));
}
// Called from the gpu thread when gpu init has completed.
void GpuWatchdogThreadImplV2::OnInitComplete() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::UpdateInitializationFlag,
base::Unretained(this)));
Disarm();
}
// Called from the gpu thread in viz::GpuServiceImpl::~GpuServiceImpl().
// After this, no Disarm() will be called before the watchdog thread is
// destroyed. If this destruction takes too long, the watchdog timeout
// will be triggered.
void GpuWatchdogThreadImplV2::OnGpuProcessTearDown() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
in_gpu_process_teardown_ = true;
if (!IsArmed())
Arm();
}
// Called from the gpu main thread.
void GpuWatchdogThreadImplV2::PauseWatchdog() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::StopWatchdogTimeoutTask,
base::Unretained(this), kGeneralGpuFlow));
}
// Called from the gpu main thread.
void GpuWatchdogThreadImplV2::ResumeWatchdog() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::RestartWatchdogTimeoutTask,
base::Unretained(this), kGeneralGpuFlow));
}
// Running on the watchdog thread.
// On Linux, Init() will be called twice for Sandbox Initialization. The
// watchdog is stopped and then restarted in StartSandboxLinux(). Everything
// should be the same and continue after the second init().
void GpuWatchdogThreadImplV2::Init() {
watchdog_thread_task_runner_ = base::ThreadTaskRunnerHandle::Get();
// Get and Invalidate weak_ptr should be done on the watchdog thread only.
weak_ptr_ = weak_factory_.GetWeakPtr();
base::TimeDelta timeout = watchdog_timeout_ * kInitFactor;
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout, weak_ptr_),
timeout);
last_arm_disarm_counter_ = ReadArmDisarmCounter();
watchdog_start_timeticks_ = base::TimeTicks::Now();
last_on_watchdog_timeout_timeticks_ = watchdog_start_timeticks_;
next_on_watchdog_timeout_time_ = base::Time::Now() + timeout;
#if defined(OS_WIN)
if (watched_thread_handle_) {
if (base::ThreadTicks::IsSupported())
base::ThreadTicks::WaitUntilInitialized();
last_on_watchdog_timeout_thread_ticks_ = GetWatchedThreadTime();
remaining_watched_thread_ticks_ = timeout;
}
#endif
}
// Running on the watchdog thread.
void GpuWatchdogThreadImplV2::CleanUp() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
weak_factory_.InvalidateWeakPtrs();
}
void GpuWatchdogThreadImplV2::ReportProgress() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
InProgress();
}
void GpuWatchdogThreadImplV2::WillProcessTask(
const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// The watchdog is armed at the beginning of the gpu process teardown.
// Do not call Arm() during teardown.
if (in_gpu_process_teardown_)
DCHECK(IsArmed());
else
Arm();
}
void GpuWatchdogThreadImplV2::DidProcessTask(
const base::PendingTask& pending_task) {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Keep the watchdog armed during tear down.
if (in_gpu_process_teardown_)
InProgress();
else
Disarm();
}
// Power Suspends. Running on the watchdog thread.
void GpuWatchdogThreadImplV2::OnSuspend() {
StopWatchdogTimeoutTask(kPowerSuspendResume);
}
// Power Resumes. Running on the watchdog thread.
void GpuWatchdogThreadImplV2::OnResume() {
RestartWatchdogTimeoutTask(kPowerSuspendResume);
}
// Running on the watchdog thread.
void GpuWatchdogThreadImplV2::OnAddPowerObserver() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
DCHECK(base::PowerMonitor::IsInitialized());
base::PowerMonitor::AddObserver(this);
is_power_observer_added_ = true;
}
// Running on the watchdog thread.
void GpuWatchdogThreadImplV2::RestartWatchdogTimeoutTask(
PauseResumeSource source_of_request) {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
base::TimeDelta timeout;
switch (source_of_request) {
case kAndroidBackgroundForeground:
if (!is_backgrounded_)
return;
is_backgrounded_ = false;
timeout = watchdog_timeout_ * watchdog_restart_factor_;
foregrounded_timeticks_ = base::TimeTicks::Now();
foregrounded_event_ = true;
num_of_timeout_after_foregrounded_ = 0;
break;
case kPowerSuspendResume:
if (!in_power_suspension_)
return;
in_power_suspension_ = false;
timeout = watchdog_timeout_ * watchdog_restart_factor_;
power_resume_timeticks_ = base::TimeTicks::Now();
power_resumed_event_ = true;
num_of_timeout_after_power_resume_ = 0;
break;
case kGeneralGpuFlow:
if (!is_paused_)
return;
is_paused_ = false;
timeout = watchdog_timeout_ * watchdog_init_factor_;
watchdog_resume_timeticks_ = base::TimeTicks::Now();
break;
}
if (!is_backgrounded_ && !in_power_suspension_ && !is_paused_) {
weak_ptr_ = weak_factory_.GetWeakPtr();
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout, weak_ptr_),
timeout);
last_on_watchdog_timeout_timeticks_ = base::TimeTicks::Now();
next_on_watchdog_timeout_time_ = base::Time::Now() + timeout;
last_arm_disarm_counter_ = ReadArmDisarmCounter();
#if defined(OS_WIN)
if (watched_thread_handle_) {
last_on_watchdog_timeout_thread_ticks_ = GetWatchedThreadTime();
remaining_watched_thread_ticks_ = timeout;
}
#endif
}
}
void GpuWatchdogThreadImplV2::StopWatchdogTimeoutTask(
PauseResumeSource source_of_request) {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
switch (source_of_request) {
case kAndroidBackgroundForeground:
if (is_backgrounded_)
return;
is_backgrounded_ = true;
backgrounded_timeticks_ = base::TimeTicks::Now();
foregrounded_event_ = false;
break;
case kPowerSuspendResume:
if (in_power_suspension_)
return;
in_power_suspension_ = true;
power_suspend_timeticks_ = base::TimeTicks::Now();
power_resumed_event_ = false;
break;
case kGeneralGpuFlow:
if (is_paused_)
return;
is_paused_ = true;
watchdog_pause_timeticks_ = base::TimeTicks::Now();
break;
}
// Revoke any pending watchdog timeout task
weak_factory_.InvalidateWeakPtrs();
}
void GpuWatchdogThreadImplV2::UpdateInitializationFlag() {
in_gpu_initialization_ = false;
}
// Called from the gpu main thread.
// The watchdog is armed only in these three functions -
// GpuWatchdogThreadImplV2(), WillProcessTask(), and OnGpuProcessTearDown()
void GpuWatchdogThreadImplV2::Arm() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 1);
// Arm/Disarm are always called in sequence. Now it's an odd number.
DCHECK(IsArmed());
}
void GpuWatchdogThreadImplV2::Disarm() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 1);
// Arm/Disarm are always called in sequence. Now it's an even number.
DCHECK(!IsArmed());
}
void GpuWatchdogThreadImplV2::InProgress() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
// Increment by 2. This is equivalent to Disarm() + Arm().
base::subtle::NoBarrier_AtomicIncrement(&arm_disarm_counter_, 2);
// Now it's an odd number.
DCHECK(IsArmed());
}
bool GpuWatchdogThreadImplV2::IsArmed() {
// It's an odd number.
return base::subtle::NoBarrier_Load(&arm_disarm_counter_) & 1;
}
base::subtle::Atomic32 GpuWatchdogThreadImplV2::ReadArmDisarmCounter() {
return base::subtle::NoBarrier_Load(&arm_disarm_counter_);
}
// Running on the watchdog thread.
void GpuWatchdogThreadImplV2::OnWatchdogTimeout() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
DCHECK(!is_backgrounded_);
DCHECK(!in_power_suspension_);
DCHECK(!is_paused_);
// If this metric is added too early (eg. watchdog creation time), it cannot
// be persistent. The histogram data will be lost after crash or browser exit.
// Delay the recording of kGpuWatchdogStart until the firs
// OnWatchdogTimeout() to ensure this metric is created in the persistent
// memory.
if (!is_watchdog_start_histogram_recorded) {
is_watchdog_start_histogram_recorded = true;
GpuWatchdogHistogram(GpuWatchdogThreadEvent::kGpuWatchdogStart);
}
auto arm_disarm_counter = ReadArmDisarmCounter();
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kTimeout);
if (power_resumed_event_)
num_of_timeout_after_power_resume_++;
if (foregrounded_event_)
num_of_timeout_after_foregrounded_++;
#if defined(USE_X11)
UpdateActiveTTY();
#endif
// Collect all needed info for gpu hang detection.
bool disarmed = arm_disarm_counter % 2 == 0; // even number
bool gpu_makes_progress = arm_disarm_counter != last_arm_disarm_counter_;
bool no_gpu_hang = disarmed || gpu_makes_progress || SlowWatchdogThread();
bool watched_thread_needs_more_time =
WatchedThreadNeedsMoreThreadTime(no_gpu_hang);
no_gpu_hang = no_gpu_hang || watched_thread_needs_more_time ||
ContinueOnNonHostX11ServerTty();
bool allows_extra_timeout = WatchedThreadGetsExtraTimeout(no_gpu_hang);
no_gpu_hang = no_gpu_hang || allows_extra_timeout;
// No gpu hang. Continue with another OnWatchdogTimeout task.
if (no_gpu_hang) {
last_on_watchdog_timeout_timeticks_ = base::TimeTicks::Now();
next_on_watchdog_timeout_time_ = base::Time::Now() + watchdog_timeout_;
last_arm_disarm_counter_ = ReadArmDisarmCounter();
task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&GpuWatchdogThreadImplV2::OnWatchdogTimeout, weak_ptr_),
watchdog_timeout_);
return;
}
// Still armed without any progress. GPU possibly hangs.
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kKill);
#if defined(OS_WIN)
if (less_than_full_thread_time_after_capped_)
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kKillOnLessThreadTime);
#endif
DeliberatelyTerminateToRecoverFromHang();
}
bool GpuWatchdogThreadImplV2::SlowWatchdogThread() {
// If it takes 15 more seconds than the expected time between two
// OnWatchdogTimeout() calls, the system is considered slow and it's not a GPU
// hang.
bool slow_watchdog_thread =
(base::Time::Now() - next_on_watchdog_timeout_time_) >=
base::TimeDelta::FromSeconds(15);
// Record this case only when a GPU hang is detected and the thread is slow.
if (slow_watchdog_thread)
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kSlowWatchdogThread);
return slow_watchdog_thread;
}
bool GpuWatchdogThreadImplV2::WatchedThreadNeedsMoreThreadTime(
bool no_gpu_hang_detected) {
#if defined(OS_WIN)
if (!watched_thread_handle_)
return false;
// We allow extra thread time. When that runs out, we extend extra timeout
// cycles. Now, we are extending extra timeout cycles. Don't add extra thread
// time.
if (count_of_extra_cycles_ > 0)
return false;
WatchedThreadNeedsMoreThreadTimeHistogram(
no_gpu_hang_detected,
/*start_of_more_thread_time*/ false);
if (!no_gpu_hang_detected && count_of_more_gpu_thread_time_allowed_ >=
kMaxCountOfMoreGpuThreadTimeAllowed) {
less_than_full_thread_time_after_capped_ = true;
} else {
less_than_full_thread_time_after_capped_ = false;
}
// Calculate how many thread ticks the watched thread spent doing the work.
base::ThreadTicks now = GetWatchedThreadTime();
base::TimeDelta thread_time_elapsed =
now - last_on_watchdog_timeout_thread_ticks_;
last_on_watchdog_timeout_thread_ticks_ = now;
remaining_watched_thread_ticks_ -= thread_time_elapsed;
if (no_gpu_hang_detected ||
count_of_more_gpu_thread_time_allowed_ >=
kMaxCountOfMoreGpuThreadTimeAllowed ||
thread_time_elapsed < base::TimeDelta() /* bogus data */ ||
remaining_watched_thread_ticks_ <= base::TimeDelta()) {
// Reset the remaining thread ticks.
remaining_watched_thread_ticks_ = watchdog_timeout_;
count_of_more_gpu_thread_time_allowed_ = 0;
return false;
} else {
// This is the start of allowing more thread time.
if (count_of_more_gpu_thread_time_allowed_ == 0) {
WatchedThreadNeedsMoreThreadTimeHistogram(
no_gpu_hang_detected, /*start_of_more_thread_time*/ true);
}
count_of_more_gpu_thread_time_allowed_++;
return true;
}
#else
return false;
#endif
}
#if defined(OS_WIN)
base::ThreadTicks GpuWatchdogThreadImplV2::GetWatchedThreadTime() {
DCHECK(watched_thread_handle_);
if (base::ThreadTicks::IsSupported()) {
// Note: GetForThread() might return bogus results if running on different
// CPUs between two calls.
return base::ThreadTicks::GetForThread(
base::PlatformThreadHandle(watched_thread_handle_));
} else {
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
BOOL result = GetThreadTimes(watched_thread_handle_, &creation_time,
&exit_time, &kernel_time, &user_time);
if (!result)
return base::ThreadTicks();
// Need to bit_cast to fix alignment, then divide by 10 to convert
// 100-nanoseconds to microseconds.
int64_t user_time_us = bit_cast<int64_t, FILETIME>(user_time) / 10;
int64_t kernel_time_us = bit_cast<int64_t, FILETIME>(kernel_time) / 10;
return base::ThreadTicks() +
base::TimeDelta::FromMicroseconds(user_time_us + kernel_time_us);
}
}
#endif
bool GpuWatchdogThreadImplV2::WatchedThreadGetsExtraTimeout(bool no_gpu_hang) {
if (max_extra_cycles_before_kill_ == 0)
return false;
// We want to record histograms even if there is no gpu hang.
bool allows_more_timeouts = false;
WatchedThreadGetsExtraTimeoutHistogram(no_gpu_hang);
if (no_gpu_hang) {
if (count_of_extra_cycles_ > 0) {
count_of_extra_cycles_ = 0;
}
} else if (count_of_extra_cycles_ < max_extra_cycles_before_kill_) {
count_of_extra_cycles_++;
allows_more_timeouts = true;
}
return allows_more_timeouts;
}
void GpuWatchdogThreadImplV2::DeliberatelyTerminateToRecoverFromHang() {
DCHECK(watchdog_thread_task_runner_->BelongsToCurrentThread());
// If this is for gpu testing, do not terminate the gpu process.
if (is_test_mode_) {
test_result_timeout_and_gpu_hang_.Set();
return;
}
#if defined(OS_WIN)
if (IsDebuggerPresent())
return;
#endif
// Store variables so they're available in crash dumps to help determine the
// cause of any hang.
base::TimeTicks function_begin_timeticks = base::TimeTicks::Now();
base::debug::Alias(&in_gpu_initialization_);
base::debug::Alias(&num_of_timeout_after_power_resume_);
base::debug::Alias(&num_of_timeout_after_foregrounded_);
base::debug::Alias(&function_begin_timeticks);
base::debug::Alias(&watchdog_start_timeticks_);
base::debug::Alias(&power_suspend_timeticks_);
base::debug::Alias(&power_resume_timeticks_);
base::debug::Alias(&backgrounded_timeticks_);
base::debug::Alias(&foregrounded_timeticks_);
base::debug::Alias(&watchdog_pause_timeticks_);
base::debug::Alias(&watchdog_resume_timeticks_);
base::debug::Alias(&in_power_suspension_);
base::debug::Alias(&in_gpu_process_teardown_);
base::debug::Alias(&is_backgrounded_);
base::debug::Alias(&is_add_power_observer_called_);
base::debug::Alias(&is_power_observer_added_);
base::debug::Alias(&last_on_watchdog_timeout_timeticks_);
base::TimeDelta timeticks_elapses =
function_begin_timeticks - last_on_watchdog_timeout_timeticks_;
base::debug::Alias(&timeticks_elapses);
base::debug::Alias(&max_extra_cycles_before_kill_);
#if defined(OS_WIN)
base::debug::Alias(&remaining_watched_thread_ticks_);
base::debug::Alias(&less_than_full_thread_time_after_capped_);
#endif
GpuWatchdogHistogram(GpuWatchdogThreadEvent::kGpuWatchdogKill);
crash_keys::gpu_watchdog_crashed_in_gpu_init.Set(
in_gpu_initialization_ ? "1" : "0");
crash_keys::gpu_watchdog_kill_after_power_resume.Set(
WithinOneMinFromPowerResumed() ? "1" : "0");
crash_keys::num_of_processors.Set(base::NumberToString(num_of_processors_));
// Check the arm_disarm_counter value one more time.
auto last_arm_disarm_counter = ReadArmDisarmCounter();
base::debug::Alias(&last_arm_disarm_counter);
// Use RESULT_CODE_HUNG so this crash is separated from other
// EXCEPTION_ACCESS_VIOLATION buckets for UMA analysis.
// Create a crash dump first. TerminateCurrentProcessImmediately will not
// create a dump.
base::debug::DumpWithoutCrashing();
base::Process::TerminateCurrentProcessImmediately(RESULT_CODE_HUNG);
}
void GpuWatchdogThreadImplV2::GpuWatchdogHistogram(
GpuWatchdogThreadEvent thread_event) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Event", thread_event);
}
void GpuWatchdogThreadImplV2::GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent timeout_event) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout", timeout_event);
bool recorded = false;
if (in_gpu_initialization_) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.Init",
timeout_event);
recorded = true;
}
if (WithinOneMinFromPowerResumed()) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.PowerResume",
timeout_event);
recorded = true;
}
if (WithinOneMinFromForegrounded()) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.Foregrounded",
timeout_event);
recorded = true;
}
if (!recorded) {
base::UmaHistogramEnumeration("GPU.WatchdogThread.Timeout.Normal",
timeout_event);
}
}
#if defined(OS_WIN)
void GpuWatchdogThreadImplV2::RecordExtraThreadTimeHistogram() {
// Record the number of timeouts the GPU main thread needs to make a progress
// after GPU OnWatchdogTimeout() is triggered. The maximum count is 6 which
// is more than kMaxCountOfMoreGpuThreadTimeAllowed(4);
constexpr int kMin = 1;
constexpr int kMax = 6;
constexpr int kBuckets = 6;
int count = count_of_more_gpu_thread_time_allowed_;
bool recorded = false;
base::UmaHistogramCustomCounts("GPU.WatchdogThread.ExtraThreadTime", count,
kMin, kMax, kBuckets);
if (in_gpu_initialization_) {
base::UmaHistogramCustomCounts("GPU.WatchdogThread.ExtraThreadTime.Init",
count, kMin, kMax, kBuckets);
recorded = true;
}
if (WithinOneMinFromPowerResumed()) {
base::UmaHistogramCustomCounts(
"GPU.WatchdogThread.ExtraThreadTime.PowerResume", count, kMin, kMax,
kBuckets);
recorded = true;
}
if (WithinOneMinFromForegrounded()) {
base::UmaHistogramCustomCounts(
"GPU.WatchdogThread.ExtraThreadTime.Foregrounded", count, kMin, kMax,
kBuckets);
recorded = true;
}
if (!recorded) {
base::UmaHistogramCustomCounts("GPU.WatchdogThread.ExtraThreadTime.Normal",
count, kMin, kMax, kBuckets);
}
}
void GpuWatchdogThreadImplV2::
RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(int count) {
constexpr int kMax = 4;
base::UmaHistogramExactLinear("GPU.WatchdogThread.ExtraThreadTime.NumOfUsers",
count, kMax);
}
void GpuWatchdogThreadImplV2::WatchedThreadNeedsMoreThreadTimeHistogram(
bool no_gpu_hang_detected,
bool start_of_more_thread_time) {
if (start_of_more_thread_time) {
// This is the start of allowing more thread time. Only record it once for
// all following timeouts on the same detected gpu hang, so we know this
// is equivlent one crash in our crash reports.
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kMoreThreadTime);
RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(0);
} else {
if (count_of_more_gpu_thread_time_allowed_ > 0) {
if (no_gpu_hang_detected) {
// If count_of_more_gpu_thread_time_allowed_ > 0, we know extra time was
// extended in the previous OnWatchdogTimeout(). Now we find gpu makes
// progress. Record this case.
GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent::kProgressAfterMoreThreadTime);
RecordExtraThreadTimeHistogram();
} else {
if (count_of_more_gpu_thread_time_allowed_ >=
kMaxCountOfMoreGpuThreadTimeAllowed) {
GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent::kLessThanFullThreadTimeAfterCapped);
}
}
// Records the number of users who are still waiting. We can use this
// number to calculate the number of users who had already quit.
RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(
count_of_more_gpu_thread_time_allowed_);
// Used by GPU.WatchdogThread.WaitTime later
time_in_wait_for_full_thread_time_ =
count_of_more_gpu_thread_time_allowed_ * watchdog_timeout_;
}
}
}
#endif
void GpuWatchdogThreadImplV2::WatchedThreadGetsExtraTimeoutHistogram(
bool no_gpu_hang) {
constexpr int kMax = 60;
if (count_of_extra_cycles_ == 0 && !no_gpu_hang) {
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kTimeoutWait);
base::UmaHistogramExactLinear("GPU.WatchdogThread.WaitTime.NumOfUsers", 0,
kMax);
} else if (count_of_extra_cycles_ > 0) {
int count = watchdog_timeout_.InSeconds() * count_of_extra_cycles_;
base::UmaHistogramExactLinear("GPU.WatchdogThread.WaitTime.NumOfUsers",
count, kMax);
if (no_gpu_hang) {
GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent::kProgressAfterWait);
base::UmaHistogramExactLinear(
"GPU.WatchdogThread.WaitTime.ProgressAfterWait", count, kMax);
#if defined(OS_WIN)
// Add the time the GPU thread was given for the full thread time up to 60
// seconds. GPU.WatchdogThread.WaitTime is essentially equal to
// GPU.WatchdogThread.WaitTime.ProgressAfterWait on non-Windows systems.
base::TimeDelta wait_time = base::TimeDelta::FromSeconds(count);
wait_time += time_in_wait_for_full_thread_time_;
constexpr base::TimeDelta kMinTime = base::TimeDelta::FromSeconds(1);
constexpr base::TimeDelta kMaxTime = base::TimeDelta::FromSeconds(150);
constexpr int kBuckets = 50;
// The time the GPU main thread takes to finish a task after a "hang" is
// dectedted.
base::UmaHistogramCustomTimes("GPU.WatchdogThread.WaitTime", wait_time,
kMinTime, kMaxTime, kBuckets);
#endif
}
}
}
bool GpuWatchdogThreadImplV2::WithinOneMinFromPowerResumed() {
size_t count = base::ClampFloor<size_t>(base::TimeDelta::FromMinutes(1) /
watchdog_timeout_);
return power_resumed_event_ && num_of_timeout_after_power_resume_ <= count;
}
bool GpuWatchdogThreadImplV2::WithinOneMinFromForegrounded() {
size_t count = base::ClampFloor<size_t>(base::TimeDelta::FromMinutes(1) /
watchdog_timeout_);
return foregrounded_event_ && num_of_timeout_after_foregrounded_ <= count;
}
#if defined(USE_X11)
void GpuWatchdogThreadImplV2::UpdateActiveTTY() {
last_active_tty_ = active_tty_;
active_tty_ = -1;
char tty_string[8] = {0};
if (tty_file_ && !fseek(tty_file_, 0, SEEK_SET) &&
fread(tty_string, 1, 7, tty_file_)) {
int tty_number;
if (sscanf(tty_string, "tty%d\n", &tty_number) == 1) {
active_tty_ = tty_number;
}
}
}
#endif
bool GpuWatchdogThreadImplV2::ContinueOnNonHostX11ServerTty() {
#if defined(USE_X11)
if (host_tty_ == -1 || active_tty_ == -1)
return false;
// Don't crash if we're not on the TTY of our host X11 server.
if (active_tty_ != host_tty_) {
// Only record for the time there is a change on TTY
if (last_active_tty_ == active_tty_) {
GpuWatchdogTimeoutHistogram(
GpuWatchdogTimeoutEvent::kContinueOnNonHostServerTty);
}
return true;
}
#endif
return false;
}
// For gpu testing only. Return whether a GPU hang was detected or not.
bool GpuWatchdogThreadImplV2::IsGpuHangDetectedForTesting() {
DCHECK(is_test_mode_);
return test_result_timeout_and_gpu_hang_.IsSet();
}
// This should be called on the test main thread only. It will wait until the
// power observer is added on the watchdog thread.
void GpuWatchdogThreadImplV2::WaitForPowerObserverAddedForTesting() {
DCHECK(watched_gpu_task_runner_->BelongsToCurrentThread());
DCHECK(is_add_power_observer_called_);
// Just return if it has been added.
if (is_power_observer_added_)
return;
base::WaitableEvent event;
task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&base::WaitableEvent::Signal, base::Unretained(&event)));
event.Wait();
}
} // namespace gpu
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_V2_H_
#define GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_V2_H_
#include "build/build_config.h"
#include "gpu/ipc/service/gpu_watchdog_thread.h"
namespace gpu {
#if defined(OS_WIN)
// If the actual time the watched GPU thread spent doing actual work is less
// than the wathdog timeout, the GPU thread can continue running through
// OnGPUWatchdogTimeout for at most 4 times before the gpu thread is killed.
constexpr int kMaxCountOfMoreGpuThreadTimeAllowed = 3;
#endif
constexpr int kMaxExtraCyclesBeforeKill = 0;
class GPU_IPC_SERVICE_EXPORT GpuWatchdogThreadImplV2
: public GpuWatchdogThread,
public base::TaskObserver {
public:
static std::unique_ptr<GpuWatchdogThreadImplV2> Create(
bool start_backgrounded);
static std::unique_ptr<GpuWatchdogThreadImplV2> Create(
bool start_backgrounded,
base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool test_mode);
~GpuWatchdogThreadImplV2() override;
// Implements GpuWatchdogThread.
void AddPowerObserver() override;
void OnBackgrounded() override;
void OnForegrounded() override;
void OnInitComplete() override;
void OnGpuProcessTearDown() override;
void ResumeWatchdog() override;
void PauseWatchdog() override;
bool IsGpuHangDetectedForTesting() override;
void WaitForPowerObserverAddedForTesting() override;
// Implements base::Thread.
void Init() override;
void CleanUp() override;
// Implements gl::ProgressReporter.
void ReportProgress() override;
// Implements TaskObserver.
void WillProcessTask(const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) override;
void DidProcessTask(const base::PendingTask& pending_task) override;
// Implements base::PowerObserver.
void OnSuspend() override;
void OnResume() override;
private:
enum PauseResumeSource {
kAndroidBackgroundForeground = 0,
kPowerSuspendResume = 1,
kGeneralGpuFlow = 2,
};
GpuWatchdogThreadImplV2(base::TimeDelta timeout,
int init_factor,
int restart_factor,
int max_extra_cycles_before_kill,
bool test_mode);
void OnAddPowerObserver();
void RestartWatchdogTimeoutTask(PauseResumeSource source_of_request);
void StopWatchdogTimeoutTask(PauseResumeSource source_of_request);
void UpdateInitializationFlag();
void Arm();
void Disarm();
void InProgress();
bool IsArmed();
base::subtle::Atomic32 ReadArmDisarmCounter();
void OnWatchdogTimeout();
bool SlowWatchdogThread();
bool WatchedThreadNeedsMoreThreadTime(bool no_gpu_hang_detected);
#if defined(OS_WIN)
base::ThreadTicks GetWatchedThreadTime();
#endif
bool WatchedThreadGetsExtraTimeout(bool no_gpu_hang);
// Do not change the function name. It is used for [GPU HANG] carsh reports.
void DeliberatelyTerminateToRecoverFromHang();
// Records "GPU.WatchdogThread.Event".
void GpuWatchdogHistogram(GpuWatchdogThreadEvent thread_event);
// Histogram recorded in OnWatchdogTimeout()
// Records "GPU.WatchdogThread.Timeout"
void GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent timeout_event);
#if defined(OS_WIN)
// The extra thread time the GPU main thread needs to make a progress.
// Records "GPU.WatchdogThread.ExtraThreadTime".
void RecordExtraThreadTimeHistogram();
// The number of users per timeout stay in Chrome after giving extra thread
// time. Records "GPU.WatchdogThread.ExtraThreadTime.NumOfUsers" and
// "GPU.WatchdogThread.Timeout".
void RecordNumOfUsersWaitingWithExtraThreadTimeHistogram(int count);
// Histograms recorded for WatchedThreadNeedsMoreThreadTime() function.
void WatchedThreadNeedsMoreThreadTimeHistogram(
bool no_gpu_hang_detected,
bool start_of_more_thread_time);
#endif
// The number of users stay in Chrome after the extra timeout wait cycles.
// Records "GPU.WatchdogThread.WaitTime.ProgressAfterWait",
// "GPU.WatchdogThread.WaitTime.NumOfUsers" and "GPU.WatchdogThread.Timeout".
void WatchedThreadGetsExtraTimeoutHistogram(bool no_gpu_hang);
// Used for metrics. It's 1 minute after the event.
bool WithinOneMinFromPowerResumed();
bool WithinOneMinFromForegrounded();
#if defined(USE_X11)
void UpdateActiveTTY();
#endif
// The watchdog continues when it's not on the TTY of our host X11 server.
bool ContinueOnNonHostX11ServerTty();
// This counter is only written on the gpu thread, and read on both threads.
volatile base::subtle::Atomic32 arm_disarm_counter_ = 0;
// The counter number read in the last OnWatchdogTimeout() on the watchdog
// thread.
int32_t last_arm_disarm_counter_ = 0;
// Timeout on the watchdog thread to check if gpu hangs.
base::TimeDelta watchdog_timeout_;
// The one-time watchdog timeout multiplier in the gpu initialization.
int watchdog_init_factor_;
// The one-time watchdog timeout multiplier after the watchdog pauses and
// restarts.
int watchdog_restart_factor_;
// The time the gpu watchdog was created.
base::TimeTicks watchdog_start_timeticks_;
// The time the last OnSuspend and OnResume was called.
base::TimeTicks power_suspend_timeticks_;
base::TimeTicks power_resume_timeticks_;
// The time the last OnBackgrounded and OnForegrounded was called.
base::TimeTicks backgrounded_timeticks_;
base::TimeTicks foregrounded_timeticks_;
// The time PauseWatchdog and ResumeWatchdog was called.
base::TimeTicks watchdog_pause_timeticks_;
base::TimeTicks watchdog_resume_timeticks_;
// TimeTicks: Tracking the amount of time a task runs. Executing delayed
// tasks at the right time.
// ThreadTicks: Use this timer to (approximately) measure how much time the
// calling thread spent doing actual work vs. being de-scheduled.
// The time the last OnWatchdogTimeout() was called.
base::TimeTicks last_on_watchdog_timeout_timeticks_;
// The wall-clock time the next OnWatchdogTimeout() will be called.
base::Time next_on_watchdog_timeout_time_;
#if defined(OS_WIN)
base::ThreadTicks last_on_watchdog_timeout_thread_ticks_;
// The difference between the timeout and the actual time the watched thread
// spent doing actual work.
base::TimeDelta remaining_watched_thread_ticks_;
// The Windows thread hanndle of the watched GPU main thread.
void* watched_thread_handle_ = nullptr;
// After GPU hang detected, how many times has the GPU thread been allowed to
// continue due to not enough thread time.
int count_of_more_gpu_thread_time_allowed_ = 0;
// The total timeout, up to 60 seconds, the watchdog thread waits for the GPU
// main thread to get full thread time.
base::TimeDelta time_in_wait_for_full_thread_time_;
// After detecting GPU hang and continuing running through
// OnGpuWatchdogTimeout for the max cycles, the GPU main thread still cannot
// get the full thread time.
bool less_than_full_thread_time_after_capped_ = false;
#endif
#if defined(USE_X11)
FILE* tty_file_ = nullptr;
int host_tty_ = -1;
int active_tty_ = -1;
int last_active_tty_ = -1;
#endif
// The system has entered the power suspension mode.
bool in_power_suspension_ = false;
// The GPU process has started tearing down. Accessed only in the gpu process.
bool in_gpu_process_teardown_ = false;
// Chrome is running on the background on Android. Gpu is probably very slow
// or stalled.
bool is_backgrounded_ = false;
// The GPU watchdog is paused. The timeout task is temporarily stopped.
bool is_paused_ = false;
// Whether the watchdog thread has been called and added to the power monitor
// observer.
bool is_add_power_observer_called_ = false;
bool is_power_observer_added_ = false;
// whether GpuWatchdogThreadEvent::kGpuWatchdogStart has been recorded.
bool is_watchdog_start_histogram_recorded = false;
// Read/Write by the watchdog thread only after initialized in the
// constructor.
bool in_gpu_initialization_ = false;
// The number of logical processors/cores on the current machine.
int num_of_processors_ = 0;
// Don't kill the GPU process immediately after a gpu hang is detected. Wait
// for extra cycles of timeout. Kill it, if the GPU still doesn't respond
// after wait.
const int max_extra_cycles_before_kill_;
// how many cycles of timeout since we detect a hang.
int count_of_extra_cycles_ = 0;
// For the experiment and the debugging purpose
size_t num_of_timeout_after_power_resume_ = 0;
size_t num_of_timeout_after_foregrounded_ = 0;
bool foregrounded_event_ = false;
bool power_resumed_event_ = false;
// For gpu testing only.
const bool is_test_mode_;
// Set by the watchdog thread and Read by the test thread.
base::AtomicFlag test_result_timeout_and_gpu_hang_;
scoped_refptr<base::SingleThreadTaskRunner> watched_gpu_task_runner_;
scoped_refptr<base::SingleThreadTaskRunner> watchdog_thread_task_runner_;
base::WeakPtr<GpuWatchdogThreadImplV2> weak_ptr_;
base::WeakPtrFactory<GpuWatchdogThreadImplV2> weak_factory_{this};
DISALLOW_COPY_AND_ASSIGN(GpuWatchdogThreadImplV2);
};
} // namespace gpu
#endif // GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_V2_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment