Commit 5d7fb33e authored by kylechar's avatar kylechar Committed by Commit Bot

Add GpuMode and unify fallback logic.

This CL fixes an issue where if GPU process initialization fails and
then GPU process crashes we end up incrementing the wrong UMA histogram
and recent crash count. This happens because when the GPU process
initialization fails we do a GPU mode fallback, eg. disable hardware
aceleration or SwiftShader, which modifies values in
GpuDataManagerImplPrivate. When the GPU process crashes, it checks the
values in GpuDataManagerImplPrivate that were already changed.

For example, if hardware acceleration was tried and GPU process
initialization fails then hardware acceleration will be disabled from
GpuProcessHost::DidFailInitialize(). When the GPU process crashes,
GpuProcessHost::RecordProcessCrash() will increment the SwiftShader UMA
and recent crash count instead of hardware accelerated UMA and crash
count.

Add GpuMode which tracks this information and have GpuProcessHost store
this on initialization. This way the same UMA histograms are incremented
on launch and crash. Also make sure after DidFailInitialize() that
RecordProcessCrash() doesn't also trigger GPU mode fallback.  Move the
fallback logic into GpuDataManagerImplPrivate::GpuModeFallback() to
ensure it's the same for all cases.

Change Chrome OS to also LOG(FATAL) if GPU process crashes too many
times or fails to initialize. Chrome OS always crashed in this case, it
would just crash later in GpuProcessTransportFactory. Also increase max
crashes for Chrome OS from 3 to 6 to match Android.

Bug: 852063
Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel
Change-Id: I6870d0d877cdc2766d54428f90517c34cbd54343
Reviewed-on: https://chromium-review.googlesource.com/1099721
Commit-Queue: kylechar <kylechar@chromium.org>
Reviewed-by: default avatarZhenyao Mo <zmo@chromium.org>
Reviewed-by: default avatarAntoine Labour <piman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#567278}
parent 03ca85bd
......@@ -207,9 +207,14 @@ void GpuDataManagerImpl::NotifyGpuInfoUpdate() {
private_->NotifyGpuInfoUpdate();
}
void GpuDataManagerImpl::OnGpuProcessInitFailure() {
gpu::GpuMode GpuDataManagerImpl::GetGpuMode() const {
base::AutoLock auto_lock(lock_);
private_->OnGpuProcessInitFailure();
return private_->GetGpuMode();
}
void GpuDataManagerImpl::FallBackToNextGpuMode() {
base::AutoLock auto_lock(lock_);
private_->FallBackToNextGpuMode();
}
bool GpuDataManagerImpl::IsGpuProcessUsingHardwareGpu() const {
......
......@@ -24,6 +24,7 @@
#include "gpu/config/gpu_control_list.h"
#include "gpu/config/gpu_feature_info.h"
#include "gpu/config/gpu_info.h"
#include "gpu/config/gpu_mode.h"
class GURL;
......@@ -149,8 +150,14 @@ class CONTENT_EXPORT GpuDataManagerImpl : public GpuDataManager {
// status update.
void NotifyGpuInfoUpdate();
// Called when GPU process initialization failed.
void OnGpuProcessInitFailure();
// Return mode describing what the GPU process will be launched to run.
gpu::GpuMode GetGpuMode() const;
// Called when GPU process initialization failed or the GPU process has
// crashed repeatedly. This will try to disable hardware acceleration and then
// SwiftShader WebGL. It will also crash the browser process as a last resort
// on Android and Chrome OS.
void FallBackToNextGpuMode();
void BlockSwiftShader();
bool SwiftShaderAllowed() const;
......
......@@ -869,21 +869,43 @@ bool GpuDataManagerImplPrivate::NeedsCompleteGpuInfoCollection() const {
#endif
}
void GpuDataManagerImplPrivate::OnGpuProcessInitFailure() {
gpu::GpuMode GpuDataManagerImplPrivate::GetGpuMode() const {
if (HardwareAccelerationEnabled()) {
return gpu::GpuMode::HARDWARE_ACCELERATED;
} else if (SwiftShaderAllowed()) {
return gpu::GpuMode::SWIFTSHADER;
} else if (base::FeatureList::IsEnabled(features::kVizDisplayCompositor)) {
return gpu::GpuMode::DISPLAY_COMPOSITOR;
} else {
return gpu::GpuMode::DISABLED;
}
}
void GpuDataManagerImplPrivate::FallBackToNextGpuMode() {
#if defined(OS_ANDROID) || defined(OS_CHROMEOS)
// Android and Chrome OS can't switch to software compositing. If the GPU
// process initialization fails or GPU process is too unstable then crash the
// browser process to reset everything.
LOG(FATAL) << "GPU process isn't usable. Goodbye.";
#else
// TODO(kylechar): Use GpuMode to store the current mode instead of
// multiple bools.
if (!card_disabled_) {
DisableHardwareAcceleration();
return;
}
if (SwiftShaderAllowed()) {
} else if (SwiftShaderAllowed()) {
BlockSwiftShader();
return;
}
if (!base::FeatureList::IsEnabled(features::kVizDisplayCompositor)) {
// When Viz display compositor is not enabled, if GPU process fails to
// launch with hardware GPU, and then fails to launch with SwiftShader if
// available, then GPU process should not launch again.
} else if (base::FeatureList::IsEnabled(features::kVizDisplayCompositor)) {
// The GPU process is frequently crashing with only the display compositor
// running. This should never happen so something is wrong. Crash the
// browser process to reset everything.
LOG(FATAL) << "The display compositor is frequently crashing. Goodbye.";
} else {
// We are already at GpuMode::DISABLED. We shouldn't be launching the GPU
// process for it to fail.
NOTREACHED();
}
#endif
}
} // namespace content
......@@ -99,7 +99,8 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {
bool UpdateActiveGpu(uint32_t vendor_id, uint32_t device_id);
void OnGpuProcessInitFailure();
gpu::GpuMode GetGpuMode() const;
void FallBackToNextGpuMode();
// Notify all observers whenever there is a GPU info update.
void NotifyGpuInfoUpdate();
......
......@@ -884,6 +884,9 @@ bool GpuProcessHost::Init() {
process_->GetHost()->CreateChannelMojo();
mode_ = GpuDataManagerImpl::GetInstance()->GetGpuMode();
DCHECK_NE(mode_, gpu::GpuMode::DISABLED);
if (in_process_) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
DCHECK(GetGpuMainThreadFactory());
......@@ -1204,7 +1207,7 @@ void GpuProcessHost::DidFailInitialize() {
UMA_HISTOGRAM_BOOLEAN("GPU.GPUProcessInitialized", false);
status_ = FAILURE;
GpuDataManagerImpl* gpu_data_manager = GpuDataManagerImpl::GetInstance();
gpu_data_manager->OnGpuProcessInitFailure();
gpu_data_manager->FallBackToNextGpuMode();
RunRequestGPUInfoCallbacks(gpu_data_manager->GetGPUInfo());
}
......@@ -1416,6 +1419,8 @@ bool GpuProcessHost::LaunchGpuProcess() {
GetContentClient()->browser()->AppendExtraCommandLineSwitches(
cmd_line.get(), process_->GetData().id);
// TODO(kylechar): The command line flags added here should be based on
// |mode_|.
GpuDataManagerImpl::GetInstance()->AppendGpuCommandLine(cmd_line.get());
bool swiftshader_rendering =
(cmd_line->GetSwitchValueASCII(switches::kUseGL) ==
......@@ -1438,14 +1443,13 @@ bool GpuProcessHost::LaunchGpuProcess() {
process_launched_ = true;
if (kind_ == GPU_PROCESS_KIND_SANDBOXED) {
auto* gpu_data_manager = GpuDataManagerImpl::GetInstance();
if (gpu_data_manager->HardwareAccelerationEnabled()) {
if (mode_ == gpu::GpuMode::HARDWARE_ACCELERATED) {
UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsHardwareAccelerated,
LAUNCHED, GPU_PROCESS_LIFETIME_EVENT_MAX);
} else if (gpu_data_manager->SwiftShaderAllowed()) {
} else if (mode_ == gpu::GpuMode::SWIFTSHADER) {
UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsSwiftShader, LAUNCHED,
GPU_PROCESS_LIFETIME_EVENT_MAX);
} else {
} else if (mode_ == gpu::GpuMode::DISPLAY_COMPOSITOR) {
UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsDisplayCompositor,
LAUNCHED, GPU_PROCESS_LIFETIME_EVENT_MAX);
}
......@@ -1495,83 +1499,64 @@ void GpuProcessHost::BlockLiveOffscreenContexts() {
}
void GpuProcessHost::RecordProcessCrash() {
#if !defined(OS_ANDROID)
// Maximum number of times the GPU process is allowed to crash in a session.
// Once this limit is reached, any request to launch the GPU process will
// fail.
const int kGpuMaxCrashCount = 3;
#if !defined(OS_ANDROID) && !defined(OS_CHROMEOS)
// Maximum number of times the GPU process can crash before we try something
// different, like disabling hardware acceleration or all GL.
constexpr int kGpuFallbackCrashCount = 3;
#else
// On android there is no way to recover without gpu, and the OS can kill the
// gpu process arbitrarily, so use a higher count to allow for that.
const int kGpuMaxCrashCount = 6;
// Android and Chrome OS switch to software compositing and fallback crashes
// the browser process. For Android the OS can also kill the GPU process
// arbitrarily. Use a larger maximum crash count here.
constexpr int kGpuFallbackCrashCount = 6;
#endif
bool disable_crash_limit = base::CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableGpuProcessCrashLimit);
// Ending only acts as a failure if the GPU process was actually started and
// was intended for actual rendering (and not just checking caps or other
// options).
if (process_launched_ && kind_ == GPU_PROCESS_KIND_SANDBOXED) {
// Keep track of the total number of GPU crashes.
base::subtle::NoBarrier_AtomicIncrement(&gpu_crash_count_, 1);
if (GpuDataManagerImpl::GetInstance()->HardwareAccelerationEnabled()) {
IncrementCrashCount(kForgiveGpuCrashMinutes,
&hardware_accelerated_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsHardwareAccelerated,
DIED_FIRST_TIME + hardware_accelerated_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
if ((hardware_accelerated_recent_crash_count_ >= kGpuMaxCrashCount ||
status_ == FAILURE) &&
!disable_crash_limit) {
#if defined(OS_ANDROID)
// Android can not fall back to software. If things are too unstable
// then we just crash chrome to reset everything. Sorry.
LOG(FATAL) << "Unable to start gpu process, giving up.";
#elif defined(OS_CHROMEOS)
// ChromeOS also can not fall back to software. There we will just
// keep retrying to make the gpu process forever. Good luck.
DLOG(ERROR) << "Gpu process is unstable and crashing repeatedly, if "
"you didn't notice already.";
#else
// The GPU process is too unstable to use. Disable it for current
// session.
GpuDataManagerImpl::GetInstance()->DisableHardwareAcceleration();
#endif
}
} else if (GpuDataManagerImpl::GetInstance()->SwiftShaderAllowed()) {
IncrementCrashCount(kForgiveGpuCrashMinutes,
&swiftshader_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsSwiftShader,
DIED_FIRST_TIME + swiftshader_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
if (swiftshader_recent_crash_count_ >= kGpuMaxCrashCount &&
!disable_crash_limit) {
// SwiftShader is too unstable to use. Disable it for current session.
GpuDataManagerImpl::GetInstance()->BlockSwiftShader();
}
} else {
IncrementCrashCount(kForgiveDisplayCompositorCrashMinutes,
&display_compositor_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsDisplayCompositor,
DIED_FIRST_TIME + display_compositor_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
if (display_compositor_recent_crash_count_ >= kGpuMaxCrashCount &&
!disable_crash_limit) {
// Something is very wrong and the GPU process keeps crashing with only
// the display compositor running. Kill the browser process to reset
// everything and attempt to improve stability.
LOG(FATAL) << "The display compositor is frequently crashing. Goodbye.";
}
}
if (!process_launched_ || kind_ != GPU_PROCESS_KIND_SANDBOXED)
return;
// Keep track of the total number of GPU crashes.
base::subtle::NoBarrier_AtomicIncrement(&gpu_crash_count_, 1);
int recent_crash_count = 0;
if (mode_ == gpu::GpuMode::HARDWARE_ACCELERATED) {
IncrementCrashCount(kForgiveGpuCrashMinutes,
&hardware_accelerated_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsHardwareAccelerated,
DIED_FIRST_TIME + hardware_accelerated_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
recent_crash_count = hardware_accelerated_recent_crash_count_;
} else if (mode_ == gpu::GpuMode::SWIFTSHADER) {
IncrementCrashCount(kForgiveGpuCrashMinutes,
&swiftshader_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsSwiftShader,
DIED_FIRST_TIME + swiftshader_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
recent_crash_count = swiftshader_recent_crash_count_;
} else if (mode_ == gpu::GpuMode::DISPLAY_COMPOSITOR) {
IncrementCrashCount(kForgiveDisplayCompositorCrashMinutes,
&display_compositor_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsDisplayCompositor,
DIED_FIRST_TIME + display_compositor_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
recent_crash_count = display_compositor_recent_crash_count_;
}
// GPU process initialization failed and fallback already happened.
if (status_ == FAILURE)
return;
bool disable_crash_limit = base::CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableGpuProcessCrashLimit);
// GPU process crashed too many times, fallback on a different GPU process
// mode.
if (recent_crash_count >= kGpuFallbackCrashCount && !disable_crash_limit)
GpuDataManagerImpl::GetInstance()->FallBackToNextGpuMode();
}
std::string GpuProcessHost::GetShaderPrefixKey() {
......
......@@ -28,6 +28,7 @@
#include "gpu/command_buffer/common/constants.h"
#include "gpu/config/gpu_feature_info.h"
#include "gpu/config/gpu_info.h"
#include "gpu/config/gpu_mode.h"
#include "gpu/ipc/common/surface_handle.h"
#include "ipc/ipc_sender.h"
#include "mojo/public/cpp/bindings/binding.h"
......@@ -293,6 +294,8 @@ class GpuProcessHost : public BrowserChildProcessHostDelegate,
GpuProcessKind kind_;
gpu::GpuMode mode_ = gpu::GpuMode::UNKNOWN;
// Whether we actually launched a GPU process.
bool process_launched_;
......
......@@ -121,6 +121,7 @@ source_set("config_sources") {
"gpu_info_collector_linux.cc",
"gpu_info_collector_mac.mm",
"gpu_info_collector_win.cc",
"gpu_mode.h",
"gpu_preferences.cc",
"gpu_preferences.h",
"gpu_switches.cc",
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef GPU_CONFIG_GPU_MODE_H_
#define GPU_CONFIG_GPU_MODE_H_
namespace gpu {
// What the GPU process is running for.
enum class GpuMode {
UNKNOWN,
// The GPU process is running with hardare acceleration.
HARDWARE_ACCELERATED,
// The GPU process is running for SwiftShader WebGL.
SWIFTSHADER,
// The GPU process is running for the display compositor (OOP-D only).
DISPLAY_COMPOSITOR,
// The GPU process is disabled and won't start (not OOP-D only).
DISABLED,
};
} // namespace gpu
#endif // GPU_CONFIG_GPU_MODE_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment