Commit 5d7fb33e authored by kylechar's avatar kylechar Committed by Commit Bot

Add GpuMode and unify fallback logic.

This CL fixes an issue where if GPU process initialization fails and
then GPU process crashes we end up incrementing the wrong UMA histogram
and recent crash count. This happens because when the GPU process
initialization fails we do a GPU mode fallback, eg. disable hardware
aceleration or SwiftShader, which modifies values in
GpuDataManagerImplPrivate. When the GPU process crashes, it checks the
values in GpuDataManagerImplPrivate that were already changed.

For example, if hardware acceleration was tried and GPU process
initialization fails then hardware acceleration will be disabled from
GpuProcessHost::DidFailInitialize(). When the GPU process crashes,
GpuProcessHost::RecordProcessCrash() will increment the SwiftShader UMA
and recent crash count instead of hardware accelerated UMA and crash
count.

Add GpuMode which tracks this information and have GpuProcessHost store
this on initialization. This way the same UMA histograms are incremented
on launch and crash. Also make sure after DidFailInitialize() that
RecordProcessCrash() doesn't also trigger GPU mode fallback.  Move the
fallback logic into GpuDataManagerImplPrivate::GpuModeFallback() to
ensure it's the same for all cases.

Change Chrome OS to also LOG(FATAL) if GPU process crashes too many
times or fails to initialize. Chrome OS always crashed in this case, it
would just crash later in GpuProcessTransportFactory. Also increase max
crashes for Chrome OS from 3 to 6 to match Android.

Bug: 852063
Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel
Change-Id: I6870d0d877cdc2766d54428f90517c34cbd54343
Reviewed-on: https://chromium-review.googlesource.com/1099721
Commit-Queue: kylechar <kylechar@chromium.org>
Reviewed-by: default avatarZhenyao Mo <zmo@chromium.org>
Reviewed-by: default avatarAntoine Labour <piman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#567278}
parent 03ca85bd
...@@ -207,9 +207,14 @@ void GpuDataManagerImpl::NotifyGpuInfoUpdate() { ...@@ -207,9 +207,14 @@ void GpuDataManagerImpl::NotifyGpuInfoUpdate() {
private_->NotifyGpuInfoUpdate(); private_->NotifyGpuInfoUpdate();
} }
void GpuDataManagerImpl::OnGpuProcessInitFailure() { gpu::GpuMode GpuDataManagerImpl::GetGpuMode() const {
base::AutoLock auto_lock(lock_); base::AutoLock auto_lock(lock_);
private_->OnGpuProcessInitFailure(); return private_->GetGpuMode();
}
void GpuDataManagerImpl::FallBackToNextGpuMode() {
base::AutoLock auto_lock(lock_);
private_->FallBackToNextGpuMode();
} }
bool GpuDataManagerImpl::IsGpuProcessUsingHardwareGpu() const { bool GpuDataManagerImpl::IsGpuProcessUsingHardwareGpu() const {
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "gpu/config/gpu_control_list.h" #include "gpu/config/gpu_control_list.h"
#include "gpu/config/gpu_feature_info.h" #include "gpu/config/gpu_feature_info.h"
#include "gpu/config/gpu_info.h" #include "gpu/config/gpu_info.h"
#include "gpu/config/gpu_mode.h"
class GURL; class GURL;
...@@ -149,8 +150,14 @@ class CONTENT_EXPORT GpuDataManagerImpl : public GpuDataManager { ...@@ -149,8 +150,14 @@ class CONTENT_EXPORT GpuDataManagerImpl : public GpuDataManager {
// status update. // status update.
void NotifyGpuInfoUpdate(); void NotifyGpuInfoUpdate();
// Called when GPU process initialization failed. // Return mode describing what the GPU process will be launched to run.
void OnGpuProcessInitFailure(); gpu::GpuMode GetGpuMode() const;
// Called when GPU process initialization failed or the GPU process has
// crashed repeatedly. This will try to disable hardware acceleration and then
// SwiftShader WebGL. It will also crash the browser process as a last resort
// on Android and Chrome OS.
void FallBackToNextGpuMode();
void BlockSwiftShader(); void BlockSwiftShader();
bool SwiftShaderAllowed() const; bool SwiftShaderAllowed() const;
......
...@@ -869,21 +869,43 @@ bool GpuDataManagerImplPrivate::NeedsCompleteGpuInfoCollection() const { ...@@ -869,21 +869,43 @@ bool GpuDataManagerImplPrivate::NeedsCompleteGpuInfoCollection() const {
#endif #endif
} }
void GpuDataManagerImplPrivate::OnGpuProcessInitFailure() { gpu::GpuMode GpuDataManagerImplPrivate::GetGpuMode() const {
if (HardwareAccelerationEnabled()) {
return gpu::GpuMode::HARDWARE_ACCELERATED;
} else if (SwiftShaderAllowed()) {
return gpu::GpuMode::SWIFTSHADER;
} else if (base::FeatureList::IsEnabled(features::kVizDisplayCompositor)) {
return gpu::GpuMode::DISPLAY_COMPOSITOR;
} else {
return gpu::GpuMode::DISABLED;
}
}
void GpuDataManagerImplPrivate::FallBackToNextGpuMode() {
#if defined(OS_ANDROID) || defined(OS_CHROMEOS)
// Android and Chrome OS can't switch to software compositing. If the GPU
// process initialization fails or GPU process is too unstable then crash the
// browser process to reset everything.
LOG(FATAL) << "GPU process isn't usable. Goodbye.";
#else
// TODO(kylechar): Use GpuMode to store the current mode instead of
// multiple bools.
if (!card_disabled_) { if (!card_disabled_) {
DisableHardwareAcceleration(); DisableHardwareAcceleration();
return; } else if (SwiftShaderAllowed()) {
}
if (SwiftShaderAllowed()) {
BlockSwiftShader(); BlockSwiftShader();
return; } else if (base::FeatureList::IsEnabled(features::kVizDisplayCompositor)) {
} // The GPU process is frequently crashing with only the display compositor
if (!base::FeatureList::IsEnabled(features::kVizDisplayCompositor)) { // running. This should never happen so something is wrong. Crash the
// When Viz display compositor is not enabled, if GPU process fails to // browser process to reset everything.
// launch with hardware GPU, and then fails to launch with SwiftShader if LOG(FATAL) << "The display compositor is frequently crashing. Goodbye.";
// available, then GPU process should not launch again. } else {
// We are already at GpuMode::DISABLED. We shouldn't be launching the GPU
// process for it to fail.
NOTREACHED(); NOTREACHED();
} }
#endif
} }
} // namespace content } // namespace content
...@@ -99,7 +99,8 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate { ...@@ -99,7 +99,8 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {
bool UpdateActiveGpu(uint32_t vendor_id, uint32_t device_id); bool UpdateActiveGpu(uint32_t vendor_id, uint32_t device_id);
void OnGpuProcessInitFailure(); gpu::GpuMode GetGpuMode() const;
void FallBackToNextGpuMode();
// Notify all observers whenever there is a GPU info update. // Notify all observers whenever there is a GPU info update.
void NotifyGpuInfoUpdate(); void NotifyGpuInfoUpdate();
......
...@@ -884,6 +884,9 @@ bool GpuProcessHost::Init() { ...@@ -884,6 +884,9 @@ bool GpuProcessHost::Init() {
process_->GetHost()->CreateChannelMojo(); process_->GetHost()->CreateChannelMojo();
mode_ = GpuDataManagerImpl::GetInstance()->GetGpuMode();
DCHECK_NE(mode_, gpu::GpuMode::DISABLED);
if (in_process_) { if (in_process_) {
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
DCHECK(GetGpuMainThreadFactory()); DCHECK(GetGpuMainThreadFactory());
...@@ -1204,7 +1207,7 @@ void GpuProcessHost::DidFailInitialize() { ...@@ -1204,7 +1207,7 @@ void GpuProcessHost::DidFailInitialize() {
UMA_HISTOGRAM_BOOLEAN("GPU.GPUProcessInitialized", false); UMA_HISTOGRAM_BOOLEAN("GPU.GPUProcessInitialized", false);
status_ = FAILURE; status_ = FAILURE;
GpuDataManagerImpl* gpu_data_manager = GpuDataManagerImpl::GetInstance(); GpuDataManagerImpl* gpu_data_manager = GpuDataManagerImpl::GetInstance();
gpu_data_manager->OnGpuProcessInitFailure(); gpu_data_manager->FallBackToNextGpuMode();
RunRequestGPUInfoCallbacks(gpu_data_manager->GetGPUInfo()); RunRequestGPUInfoCallbacks(gpu_data_manager->GetGPUInfo());
} }
...@@ -1416,6 +1419,8 @@ bool GpuProcessHost::LaunchGpuProcess() { ...@@ -1416,6 +1419,8 @@ bool GpuProcessHost::LaunchGpuProcess() {
GetContentClient()->browser()->AppendExtraCommandLineSwitches( GetContentClient()->browser()->AppendExtraCommandLineSwitches(
cmd_line.get(), process_->GetData().id); cmd_line.get(), process_->GetData().id);
// TODO(kylechar): The command line flags added here should be based on
// |mode_|.
GpuDataManagerImpl::GetInstance()->AppendGpuCommandLine(cmd_line.get()); GpuDataManagerImpl::GetInstance()->AppendGpuCommandLine(cmd_line.get());
bool swiftshader_rendering = bool swiftshader_rendering =
(cmd_line->GetSwitchValueASCII(switches::kUseGL) == (cmd_line->GetSwitchValueASCII(switches::kUseGL) ==
...@@ -1438,14 +1443,13 @@ bool GpuProcessHost::LaunchGpuProcess() { ...@@ -1438,14 +1443,13 @@ bool GpuProcessHost::LaunchGpuProcess() {
process_launched_ = true; process_launched_ = true;
if (kind_ == GPU_PROCESS_KIND_SANDBOXED) { if (kind_ == GPU_PROCESS_KIND_SANDBOXED) {
auto* gpu_data_manager = GpuDataManagerImpl::GetInstance(); if (mode_ == gpu::GpuMode::HARDWARE_ACCELERATED) {
if (gpu_data_manager->HardwareAccelerationEnabled()) {
UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsHardwareAccelerated, UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsHardwareAccelerated,
LAUNCHED, GPU_PROCESS_LIFETIME_EVENT_MAX); LAUNCHED, GPU_PROCESS_LIFETIME_EVENT_MAX);
} else if (gpu_data_manager->SwiftShaderAllowed()) { } else if (mode_ == gpu::GpuMode::SWIFTSHADER) {
UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsSwiftShader, LAUNCHED, UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsSwiftShader, LAUNCHED,
GPU_PROCESS_LIFETIME_EVENT_MAX); GPU_PROCESS_LIFETIME_EVENT_MAX);
} else { } else if (mode_ == gpu::GpuMode::DISPLAY_COMPOSITOR) {
UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsDisplayCompositor, UMA_HISTOGRAM_ENUMERATION(kProcessLifetimeEventsDisplayCompositor,
LAUNCHED, GPU_PROCESS_LIFETIME_EVENT_MAX); LAUNCHED, GPU_PROCESS_LIFETIME_EVENT_MAX);
} }
...@@ -1495,83 +1499,64 @@ void GpuProcessHost::BlockLiveOffscreenContexts() { ...@@ -1495,83 +1499,64 @@ void GpuProcessHost::BlockLiveOffscreenContexts() {
} }
void GpuProcessHost::RecordProcessCrash() { void GpuProcessHost::RecordProcessCrash() {
#if !defined(OS_ANDROID) #if !defined(OS_ANDROID) && !defined(OS_CHROMEOS)
// Maximum number of times the GPU process is allowed to crash in a session. // Maximum number of times the GPU process can crash before we try something
// Once this limit is reached, any request to launch the GPU process will // different, like disabling hardware acceleration or all GL.
// fail. constexpr int kGpuFallbackCrashCount = 3;
const int kGpuMaxCrashCount = 3;
#else #else
// On android there is no way to recover without gpu, and the OS can kill the // Android and Chrome OS switch to software compositing and fallback crashes
// gpu process arbitrarily, so use a higher count to allow for that. // the browser process. For Android the OS can also kill the GPU process
const int kGpuMaxCrashCount = 6; // arbitrarily. Use a larger maximum crash count here.
constexpr int kGpuFallbackCrashCount = 6;
#endif #endif
bool disable_crash_limit = base::CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableGpuProcessCrashLimit);
// Ending only acts as a failure if the GPU process was actually started and // Ending only acts as a failure if the GPU process was actually started and
// was intended for actual rendering (and not just checking caps or other // was intended for actual rendering (and not just checking caps or other
// options). // options).
if (process_launched_ && kind_ == GPU_PROCESS_KIND_SANDBOXED) { if (!process_launched_ || kind_ != GPU_PROCESS_KIND_SANDBOXED)
return;
// Keep track of the total number of GPU crashes. // Keep track of the total number of GPU crashes.
base::subtle::NoBarrier_AtomicIncrement(&gpu_crash_count_, 1); base::subtle::NoBarrier_AtomicIncrement(&gpu_crash_count_, 1);
if (GpuDataManagerImpl::GetInstance()->HardwareAccelerationEnabled()) { int recent_crash_count = 0;
if (mode_ == gpu::GpuMode::HARDWARE_ACCELERATED) {
IncrementCrashCount(kForgiveGpuCrashMinutes, IncrementCrashCount(kForgiveGpuCrashMinutes,
&hardware_accelerated_recent_crash_count_); &hardware_accelerated_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR( UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsHardwareAccelerated, kProcessLifetimeEventsHardwareAccelerated,
DIED_FIRST_TIME + hardware_accelerated_recent_crash_count_ - 1, DIED_FIRST_TIME + hardware_accelerated_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX)); static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
recent_crash_count = hardware_accelerated_recent_crash_count_;
if ((hardware_accelerated_recent_crash_count_ >= kGpuMaxCrashCount || } else if (mode_ == gpu::GpuMode::SWIFTSHADER) {
status_ == FAILURE) &&
!disable_crash_limit) {
#if defined(OS_ANDROID)
// Android can not fall back to software. If things are too unstable
// then we just crash chrome to reset everything. Sorry.
LOG(FATAL) << "Unable to start gpu process, giving up.";
#elif defined(OS_CHROMEOS)
// ChromeOS also can not fall back to software. There we will just
// keep retrying to make the gpu process forever. Good luck.
DLOG(ERROR) << "Gpu process is unstable and crashing repeatedly, if "
"you didn't notice already.";
#else
// The GPU process is too unstable to use. Disable it for current
// session.
GpuDataManagerImpl::GetInstance()->DisableHardwareAcceleration();
#endif
}
} else if (GpuDataManagerImpl::GetInstance()->SwiftShaderAllowed()) {
IncrementCrashCount(kForgiveGpuCrashMinutes, IncrementCrashCount(kForgiveGpuCrashMinutes,
&swiftshader_recent_crash_count_); &swiftshader_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR( UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsSwiftShader, kProcessLifetimeEventsSwiftShader,
DIED_FIRST_TIME + swiftshader_recent_crash_count_ - 1, DIED_FIRST_TIME + swiftshader_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX)); static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
recent_crash_count = swiftshader_recent_crash_count_;
if (swiftshader_recent_crash_count_ >= kGpuMaxCrashCount && } else if (mode_ == gpu::GpuMode::DISPLAY_COMPOSITOR) {
!disable_crash_limit) {
// SwiftShader is too unstable to use. Disable it for current session.
GpuDataManagerImpl::GetInstance()->BlockSwiftShader();
}
} else {
IncrementCrashCount(kForgiveDisplayCompositorCrashMinutes, IncrementCrashCount(kForgiveDisplayCompositorCrashMinutes,
&display_compositor_recent_crash_count_); &display_compositor_recent_crash_count_);
UMA_HISTOGRAM_EXACT_LINEAR( UMA_HISTOGRAM_EXACT_LINEAR(
kProcessLifetimeEventsDisplayCompositor, kProcessLifetimeEventsDisplayCompositor,
DIED_FIRST_TIME + display_compositor_recent_crash_count_ - 1, DIED_FIRST_TIME + display_compositor_recent_crash_count_ - 1,
static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX)); static_cast<int>(GPU_PROCESS_LIFETIME_EVENT_MAX));
recent_crash_count = display_compositor_recent_crash_count_;
if (display_compositor_recent_crash_count_ >= kGpuMaxCrashCount &&
!disable_crash_limit) {
// Something is very wrong and the GPU process keeps crashing with only
// the display compositor running. Kill the browser process to reset
// everything and attempt to improve stability.
LOG(FATAL) << "The display compositor is frequently crashing. Goodbye.";
}
}
} }
// GPU process initialization failed and fallback already happened.
if (status_ == FAILURE)
return;
bool disable_crash_limit = base::CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableGpuProcessCrashLimit);
// GPU process crashed too many times, fallback on a different GPU process
// mode.
if (recent_crash_count >= kGpuFallbackCrashCount && !disable_crash_limit)
GpuDataManagerImpl::GetInstance()->FallBackToNextGpuMode();
} }
std::string GpuProcessHost::GetShaderPrefixKey() { std::string GpuProcessHost::GetShaderPrefixKey() {
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "gpu/command_buffer/common/constants.h" #include "gpu/command_buffer/common/constants.h"
#include "gpu/config/gpu_feature_info.h" #include "gpu/config/gpu_feature_info.h"
#include "gpu/config/gpu_info.h" #include "gpu/config/gpu_info.h"
#include "gpu/config/gpu_mode.h"
#include "gpu/ipc/common/surface_handle.h" #include "gpu/ipc/common/surface_handle.h"
#include "ipc/ipc_sender.h" #include "ipc/ipc_sender.h"
#include "mojo/public/cpp/bindings/binding.h" #include "mojo/public/cpp/bindings/binding.h"
...@@ -293,6 +294,8 @@ class GpuProcessHost : public BrowserChildProcessHostDelegate, ...@@ -293,6 +294,8 @@ class GpuProcessHost : public BrowserChildProcessHostDelegate,
GpuProcessKind kind_; GpuProcessKind kind_;
gpu::GpuMode mode_ = gpu::GpuMode::UNKNOWN;
// Whether we actually launched a GPU process. // Whether we actually launched a GPU process.
bool process_launched_; bool process_launched_;
......
...@@ -121,6 +121,7 @@ source_set("config_sources") { ...@@ -121,6 +121,7 @@ source_set("config_sources") {
"gpu_info_collector_linux.cc", "gpu_info_collector_linux.cc",
"gpu_info_collector_mac.mm", "gpu_info_collector_mac.mm",
"gpu_info_collector_win.cc", "gpu_info_collector_win.cc",
"gpu_mode.h",
"gpu_preferences.cc", "gpu_preferences.cc",
"gpu_preferences.h", "gpu_preferences.h",
"gpu_switches.cc", "gpu_switches.cc",
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef GPU_CONFIG_GPU_MODE_H_
#define GPU_CONFIG_GPU_MODE_H_
namespace gpu {
// What the GPU process is running for.
enum class GpuMode {
UNKNOWN,
// The GPU process is running with hardare acceleration.
HARDWARE_ACCELERATED,
// The GPU process is running for SwiftShader WebGL.
SWIFTSHADER,
// The GPU process is running for the display compositor (OOP-D only).
DISPLAY_COMPOSITOR,
// The GPU process is disabled and won't start (not OOP-D only).
DISABLED,
};
} // namespace gpu
#endif // GPU_CONFIG_GPU_MODE_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment