Commit 4664a167 authored by kylechar's avatar kylechar Committed by Commit Bot

Change GPU process mojo error handler

The GPU process mojo error handler would cache the error string for 5
seconds and if a specific crash was triggered the error string would be
put in a crash key. This produced some unexpected error strings, see
https://crbug.com/1075495#c32, for deserialization errors on
viz.mojom.CompositorFrameSink interface between browser and GPU.

Change the error handler to immediately DumpWithoutCrashing() so we get
a stack trace during deserialization. Hopefully the stack trace will
provide some better clues as to what went wrong.

Bug: 1075495
Change-Id: Iaa9d6c0cddb43132603c5c0fffc1e9a76c745631
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2405413Reviewed-by: default avatarKen Rockot <rockot@google.com>
Reviewed-by: default avatarZhenyao Mo <zmo@chromium.org>
Commit-Queue: kylechar <kylechar@chromium.org>
Cr-Commit-Position: refs/heads/master@{#806878}
parent 62068812
...@@ -10,13 +10,9 @@ ...@@ -10,13 +10,9 @@
#include "base/bind.h" #include "base/bind.h"
#include "base/feature_list.h" #include "base/feature_list.h"
#include "base/message_loop/message_pump_type.h" #include "base/message_loop/message_pump_type.h"
#include "base/no_destructor.h"
#include "base/power_monitor/power_monitor.h" #include "base/power_monitor/power_monitor.h"
#include "base/power_monitor/power_monitor_source.h" #include "base/power_monitor/power_monitor_source.h"
#include "base/single_thread_task_runner.h" #include "base/single_thread_task_runner.h"
#include "base/synchronization/lock.h"
#include "base/task/thread_pool.h"
#include "base/time/time.h"
#include "base/trace_event/memory_dump_manager.h" #include "base/trace_event/memory_dump_manager.h"
#include "build/build_config.h" #include "build/build_config.h"
#include "components/ui_devtools/buildflags.h" #include "components/ui_devtools/buildflags.h"
...@@ -25,81 +21,12 @@ ...@@ -25,81 +21,12 @@
#include "gpu/ipc/service/gpu_init.h" #include "gpu/ipc/service/gpu_init.h"
#include "gpu/ipc/service/gpu_watchdog_thread.h" #include "gpu/ipc/service/gpu_watchdog_thread.h"
#include "media/gpu/buildflags.h" #include "media/gpu/buildflags.h"
#include "mojo/public/cpp/bindings/scoped_message_error_crash_key.h"
#include "mojo/public/cpp/system/functions.h"
#include "services/metrics/public/cpp/delegating_ukm_recorder.h" #include "services/metrics/public/cpp/delegating_ukm_recorder.h"
#include "services/metrics/public/cpp/mojo_ukm_recorder.h" #include "services/metrics/public/cpp/mojo_ukm_recorder.h"
#include "third_party/skia/include/core/SkFontLCDConfig.h" #include "third_party/skia/include/core/SkFontLCDConfig.h"
namespace { namespace {
// Singleton class to store error strings from global mojo error handler. It
// will store error strings for kTimeout seconds and can be used to set a crash
// key if the GPU process is going to crash due to a deserialization error.
// TODO(kylechar): This can be removed after tracking down all outstanding
// deserialization errors in messages sent from the browser to GPU on the viz
// message pipe.
class MojoErrorTracker {
public:
static constexpr base::TimeDelta kTimeout = base::TimeDelta::FromSeconds(5);
static MojoErrorTracker& Get() {
static base::NoDestructor<MojoErrorTracker> tracker;
return *tracker;
}
void OnError(const std::string& error) {
{
base::AutoLock locked(lock_);
error_ = error;
error_time_ = base::TimeTicks::Now();
}
// Once the error is old enough we will no longer use it in a crash key we
// can reset the string storage.
base::ThreadPool::PostDelayedTask(
FROM_HERE,
base::BindOnce(&MojoErrorTracker::Reset, base::Unretained(this)),
kTimeout);
}
// This will initialize the scoped crash key in |key| with the last mojo
// error message if the last mojo error happened within kTimeout seconds.
void MaybeSetCrashKeyWithRecentError(
base::Optional<mojo::debug::ScopedMessageErrorCrashKey>& key) {
base::AutoLock locked(lock_);
if (!HasErrorTimedOut())
key.emplace(error_);
}
private:
friend class base::NoDestructor<MojoErrorTracker>;
MojoErrorTracker() = default;
bool HasErrorTimedOut() const EXCLUSIVE_LOCKS_REQUIRED(lock_) {
return base::TimeTicks::Now() - error_time_ > kTimeout;
}
void Reset() {
base::AutoLock locked(lock_);
// If another mojo error happened since this task was scheduled we shouldn't
// reset the error string yet.
if (!HasErrorTimedOut())
return;
error_.clear();
error_.shrink_to_fit();
}
base::Lock lock_;
std::string error_ GUARDED_BY(lock_);
base::TimeTicks error_time_ GUARDED_BY(lock_);
};
// static
constexpr base::TimeDelta MojoErrorTracker::kTimeout;
std::unique_ptr<base::Thread> CreateAndStartIOThread() { std::unique_ptr<base::Thread> CreateAndStartIOThread() {
// TODO(sad): We do not need the IO thread once gpu has a separate process. // TODO(sad): We do not need the IO thread once gpu has a separate process.
// It should be possible to use |main_task_runner_| for doing IO tasks. // It should be possible to use |main_task_runner_| for doing IO tasks.
...@@ -136,12 +63,6 @@ VizMainImpl::VizMainImpl(Delegate* delegate, ...@@ -136,12 +63,6 @@ VizMainImpl::VizMainImpl(Delegate* delegate,
gpu_thread_task_runner_(base::ThreadTaskRunnerHandle::Get()) { gpu_thread_task_runner_(base::ThreadTaskRunnerHandle::Get()) {
DCHECK(gpu_init_); DCHECK(gpu_init_);
if (!gpu_init_->gpu_info().in_process_gpu) {
mojo::SetDefaultProcessErrorHandler(
base::BindRepeating(&MojoErrorTracker::OnError,
base::Unretained(&MojoErrorTracker::Get())));
}
// TODO(crbug.com/609317): Remove this when Mus Window Server and GPU are // TODO(crbug.com/609317): Remove this when Mus Window Server and GPU are
// split into separate processes. Until then this is necessary to be able to // split into separate processes. Until then this is necessary to be able to
// run Mushrome (chrome with mus) with Mus running in the browser process. // run Mushrome (chrome with mus) with Mus running in the browser process.
...@@ -317,16 +238,8 @@ void VizMainImpl::CreateFrameSinkManagerInternal( ...@@ -317,16 +238,8 @@ void VizMainImpl::CreateFrameSinkManagerInternal(
// FrameSinkManagerImpl, so just do a hard CHECK rather than crashing down the // FrameSinkManagerImpl, so just do a hard CHECK rather than crashing down the
// road so that all crash reports caused by this issue look the same and have // road so that all crash reports caused by this issue look the same and have
// the same signature. https://crbug.com/928845 // the same signature. https://crbug.com/928845
if (task_executor_) { CHECK(!task_executor_);
// If the global mojo error handler callback ran recently, we've cached the
// error string and will initialize |crash_key| to contain it before
// intentionally crashing. The deserialization error that caused the mojo
// error handler to run was probably, but not 100% guaranteed, the error
// that caused the main viz browser-to-GPU message pipe close.
base::Optional<mojo::debug::ScopedMessageErrorCrashKey> crash_key;
MojoErrorTracker::Get().MaybeSetCrashKeyWithRecentError(crash_key);
CHECK(!task_executor_);
}
task_executor_ = std::make_unique<gpu::GpuInProcessThreadService>( task_executor_ = std::make_unique<gpu::GpuInProcessThreadService>(
this, gpu_thread_task_runner_, gpu_service_->GetGpuScheduler(), this, gpu_thread_task_runner_, gpu_service_->GetGpuScheduler(),
gpu_service_->sync_point_manager(), gpu_service_->mailbox_manager(), gpu_service_->sync_point_manager(), gpu_service_->mailbox_manager(),
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "base/bind_helpers.h" #include "base/bind_helpers.h"
#include "base/callback_helpers.h" #include "base/callback_helpers.h"
#include "base/command_line.h" #include "base/command_line.h"
#include "base/debug/dump_without_crashing.h"
#include "base/memory/weak_ptr.h" #include "base/memory/weak_ptr.h"
#include "base/power_monitor/power_monitor.h" #include "base/power_monitor/power_monitor.h"
#include "base/power_monitor/power_monitor_device_source.h" #include "base/power_monitor/power_monitor_device_source.h"
...@@ -35,6 +36,8 @@ ...@@ -35,6 +36,8 @@
#include "mojo/public/cpp/bindings/binder_map.h" #include "mojo/public/cpp/bindings/binder_map.h"
#include "mojo/public/cpp/bindings/pending_receiver.h" #include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h" #include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/scoped_message_error_crash_key.h"
#include "mojo/public/cpp/system/functions.h"
#include "services/metrics/public/cpp/mojo_ukm_recorder.h" #include "services/metrics/public/cpp/mojo_ukm_recorder.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h" #include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "services/service_manager/public/cpp/binder_registry.h" #include "services/service_manager/public/cpp/binder_registry.h"
...@@ -49,6 +52,13 @@ ...@@ -49,6 +52,13 @@
namespace content { namespace content {
namespace { namespace {
// Called when the GPU process receives a bad IPC message.
void HandleBadMessage(const std::string& error) {
LOG(ERROR) << "Mojo error in GPU process: " << error;
mojo::debug::ScopedMessageErrorCrashKey crash_key_value(error);
base::debug::DumpWithoutCrashing();
}
ChildThreadImpl::Options GetOptions() { ChildThreadImpl::Options GetOptions() {
ChildThreadImpl::Options::Builder builder; ChildThreadImpl::Options::Builder builder;
...@@ -118,6 +128,9 @@ GpuChildThread::GpuChildThread(base::RepeatingClosure quit_closure, ...@@ -118,6 +128,9 @@ GpuChildThread::GpuChildThread(base::RepeatingClosure quit_closure,
GpuChildThread::~GpuChildThread() = default; GpuChildThread::~GpuChildThread() = default;
void GpuChildThread::Init(const base::Time& process_start_time) { void GpuChildThread::Init(const base::Time& process_start_time) {
if (!in_process_gpu())
mojo::SetDefaultProcessErrorHandler(base::BindRepeating(&HandleBadMessage));
viz_main_.gpu_service()->set_start_time(process_start_time); viz_main_.gpu_service()->set_start_time(process_start_time);
// When running in in-process mode, this has been set in the browser at // When running in in-process mode, this has been set in the browser at
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment