Refactor D3D11VideoDecoder for threading.

This CL moves all work that runs on the video decoder's thread into D3D11VideoDecoder, and all GPU main thread work into Impl. It does not actually move decoding to a different thread, nor does it remove all of the threading assumptions from the code. For example, D3D11PictureBuffers still assume that they're created and destroyed on the GPU main thread. Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel Change-Id: I4ee52d84c77c57127d26803153c043959f7429ef Reviewed-on: https://chromium-review.googlesource.com/1197165 Commit-Queue: Frank Liberato <liberato@chromium.org> Reviewed-by: Dan Sanders <sandersd@chromium.org> Cr-Commit-Position: refs/heads/master@{#589224}

Refactor D3D11VideoDecoder for threading.
This CL moves all work that runs on the video decoder's thread into D3D11VideoDecoder, and all GPU main thread work into Impl. It does not actually move decoding to a different thread, nor does it remove all of the threading assumptions from the code. For example, D3D11PictureBuffers still assume that they're created and destroyed on the GPU main thread. Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel Change-Id: I4ee52d84c77c57127d26803153c043959f7429ef Reviewed-on: https://chromium-review.googlesource.com/1197165 Commit-Queue: Frank Liberato <liberato@chromium.org> Reviewed-by: Dan Sanders <sandersd@chromium.org> Cr-Commit-Position: refs/heads/master@{#589224}
ed480cdc · liberato@chromium.org · Commit Bot · 889b4779 · ed480cdc · ed480cdc
Commit ed480cdc authored Sep 06, 2018 by liberato@chromium.org Committed by Commit Bot Sep 06, 2018
6 changed files
--- a/media/gpu/windows/d3d11_picture_buffer.cc
+++ b/media/gpu/windows/d3d11_picture_buffer.cc
@@ -38,7 +38,9 @@ D3D11PictureBuffer::D3D11PictureBuffer(GLenum target,
                                       size_t level)
    : target_(target), size_(size), level_(level) {}
-D3D11PictureBuffer::~D3D11PictureBuffer() {}
+D3D11PictureBuffer::~D3D11PictureBuffer() {
+  // TODO(liberato): post destruction of |gpu_resources_| to the gpu thread.
+}
 bool D3D11PictureBuffer::Init(
    base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb,
@@ -71,6 +73,7 @@ bool D3D11PictureBuffer::Init(
  // device for decoding.  Sharing seems not to work very well.  Otherwise, we
  // would create the texture with KEYED_MUTEX and NTHANDLE, then send along
  // a handle that we get from |texture| as an IDXGIResource1.
+  // TODO(liberato): this should happen on the gpu thread.
  gpu_resources_ = std::make_unique<GpuResources>();
  if (!gpu_resources_->Init(std::move(get_stub_cb), level_,
                            std::move(mailboxes), target_, size_, texture,

--- a/media/gpu/windows/d3d11_video_decoder.cc
+++ b/media/gpu/windows/d3d11_video_decoder.cc
@@ -10,41 +10,16 @@
 #include "base/callback.h"
 #include "base/metrics/histogram_macros.h"
 #include "media/base/bind_to_current_loop.h"
+#include "media/base/cdm_context.h"
 #include "media/base/decoder_buffer.h"
 #include "media/base/media_log.h"
 #include "media/base/video_codecs.h"
 #include "media/base/video_decoder_config.h"
 #include "media/base/video_frame.h"
+#include "media/base/video_util.h"
+#include "media/gpu/windows/d3d11_picture_buffer.h"
 #include "media/gpu/windows/d3d11_video_decoder_impl.h"
+#include "ui/gl/gl_angle_util_win.h"
-namespace {
-// Check |weak_ptr| and run |cb| with |args| if it's non-null.
-template <typename T, typename... Args>
-void CallbackOnProperThread(base::WeakPtr<T> weak_ptr,
-                            base::Callback<void(Args...)> cb,
-                            Args... args) {
-  if (weak_ptr.get())
-    cb.Run(args...);
-}
-// Given a callback, |cb|, return another callback that will call |cb| after
-// switching to the thread that BindToCurrent.... is called on.  We will check
-// |weak_ptr| on the current thread.  This is different than just calling
-// BindToCurrentLoop because we'll check the weak ptr.  If |cb| is some method
-// of |T|, then one can use BindToCurrentLoop directly.  However, in our case,
-// we have some unrelated callback that we'd like to call only if we haven't
-// been destroyed yet.  I suppose this could also just be a method:
-// template<CB, ...> D3D11VideoDecoder::CallSomeCallback(CB, ...) that's bound
-// via BindToCurrentLoop directly.
-template <typename T, typename... Args>
-base::Callback<void(Args...)> BindToCurrentThreadIfWeakPtr(
-    base::WeakPtr<T> weak_ptr,
-    base::Callback<void(Args...)> cb) {
-  return media::BindToCurrentLoop(
-      base::Bind(&CallbackOnProperThread<T, Args...>, weak_ptr, cb));
-}
-}  // namespace
 namespace media {
@@ -65,7 +40,8 @@ std::unique_ptr<VideoDecoder> D3D11VideoDecoder::Create(
      new D3D11VideoDecoder(std::move(gpu_task_runner), std::move(media_log),
                            gpu_preferences, gpu_workarounds,
                            std::make_unique<D3D11VideoDecoderImpl>(
-                                std::move(cloned_media_log), get_stub_cb)));
+                                std::move(cloned_media_log), get_stub_cb),
+                            get_stub_cb));
 }
 D3D11VideoDecoder::D3D11VideoDecoder(
@@ -73,14 +49,18 @@ D3D11VideoDecoder::D3D11VideoDecoder(
    std::unique_ptr<MediaLog> media_log,
    const gpu::GpuPreferences& gpu_preferences,
    const gpu::GpuDriverBugWorkarounds& gpu_workarounds,
-    std::unique_ptr<D3D11VideoDecoderImpl> impl)
+    std::unique_ptr<D3D11VideoDecoderImpl> impl,
+    base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb)
    : media_log_(std::move(media_log)),
      impl_(std::move(impl)),
      impl_task_runner_(std::move(gpu_task_runner)),
      gpu_preferences_(gpu_preferences),
      gpu_workarounds_(gpu_workarounds),
      create_device_func_(base::BindRepeating(D3D11CreateDevice)),
+      get_stub_cb_(get_stub_cb),
      weak_factory_(this) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  impl_weak_ = impl_->GetWeakPtr();
 }
@@ -88,6 +68,8 @@ D3D11VideoDecoder::~D3D11VideoDecoder() {
  // Post destruction to the main thread.  When this executes, it will also
  // cancel pending callbacks into |impl_| via |impl_weak_|.  Callbacks out
  // from |impl_| will be cancelled by |weak_factory_| when we return.
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  if (impl_task_runner_->RunsTasksInCurrentSequence())
    impl_.reset();
  else
@@ -105,15 +87,152 @@ void D3D11VideoDecoder::Initialize(
    const InitCB& init_cb,
    const OutputCB& output_cb,
    const WaitingForDecryptionKeyCB& waiting_for_decryption_key_cb) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  if (!IsPotentiallySupported(config)) {
    DVLOG(3) << "D3D11 video decoder not supported for the config.";
    init_cb.Run(false);
    return;
  }
+  init_cb_ = init_cb;
+  output_cb_ = output_cb;
+  is_encrypted_ = config.is_encrypted();
+  D3D11VideoDecoderImpl::InitCB cb = base::BindOnce(
+      &D3D11VideoDecoder::OnGpuInitComplete, weak_factory_.GetWeakPtr());
+  D3D11VideoDecoderImpl::ReturnPictureBufferCB return_picture_buffer_cb =
+      base::BindRepeating(&D3D11VideoDecoder::ReceivePictureBufferFromClient,
+                          weak_factory_.GetWeakPtr());
+  // Initialize the video decoder.
+  // Use the ANGLE device, rather than create our own.  It would be nice if we
+  // could use our own device, and run on the mojo thread, but texture sharing
+  // seems to be difficult.
+  // TODO(liberato): take |device_| as input.
+  device_ = gl::QueryD3D11DeviceObjectFromANGLE();
+  device_->GetImmediateContext(device_context_.ReleaseAndGetAddressOf());
+  HRESULT hr;
+  // TODO(liberato): Handle cleanup better.  Also consider being less chatty in
+  // the logs, since this will fall back.
+  hr = device_context_.CopyTo(video_context_.ReleaseAndGetAddressOf());
+  if (!SUCCEEDED(hr)) {
+    NotifyError("Failed to get device context");
+    return;
+  }
+  hr = device_.CopyTo(video_device_.ReleaseAndGetAddressOf());
+  if (!SUCCEEDED(hr)) {
+    NotifyError("Failed to get video device");
+    return;
+  }
+  GUID needed_guid;
+  memcpy(&needed_guid, &D3D11_DECODER_PROFILE_H264_VLD_NOFGT,
+         sizeof(needed_guid));
+  GUID decoder_guid = {};
+  {
+    // Enumerate supported video profiles and look for the H264 profile.
+    bool found = false;
+    UINT profile_count = video_device_->GetVideoDecoderProfileCount();
+    for (UINT profile_idx = 0; profile_idx < profile_count; profile_idx++) {
+      GUID profile_id = {};
+      hr = video_device_->GetVideoDecoderProfile(profile_idx, &profile_id);
+      if (SUCCEEDED(hr) && (profile_id == needed_guid)) {
+        decoder_guid = profile_id;
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      NotifyError("Did not find a supported profile");
+      return;
+    }
+  }
+  // TODO(liberato): dxva does this.  don't know if we need to.
+  Microsoft::WRL::ComPtr<ID3D11Multithread> multi_threaded;
+  hr = device_->QueryInterface(IID_PPV_ARGS(&multi_threaded));
+  if (!SUCCEEDED(hr)) {
+    NotifyError("Failed to query ID3D11Multithread");
+    return;
+  }
+  multi_threaded->SetMultithreadProtected(TRUE);
+  D3D11_VIDEO_DECODER_DESC desc = {};
+  desc.Guid = decoder_guid;
+  desc.SampleWidth = config.coded_size().width();
+  desc.SampleHeight = config.coded_size().height();
+  desc.OutputFormat = DXGI_FORMAT_NV12;
+  UINT config_count = 0;
+  hr = video_device_->GetVideoDecoderConfigCount(&desc, &config_count);
+  if (FAILED(hr) || config_count == 0) {
+    NotifyError("Failed to get video decoder config count");
+    return;
+  }
+  D3D11_VIDEO_DECODER_CONFIG dec_config = {};
+  bool found = false;
+  for (UINT i = 0; i < config_count; i++) {
+    hr = video_device_->GetVideoDecoderConfig(&desc, i, &dec_config);
+    if (FAILED(hr)) {
+      NotifyError("Failed to get decoder config");
+      return;
+    }
+    if (dec_config.ConfigBitstreamRaw == 2) {
+      found = true;
+      break;
+    }
+  }
+  if (!found) {
+    NotifyError("Failed to find decoder config");
+    return;
+  }
+  if (is_encrypted_)
+    dec_config.guidConfigBitstreamEncryption = D3D11_DECODER_ENCRYPTION_HW_CENC;
+  memcpy(&decoder_guid_, &decoder_guid, sizeof decoder_guid_);
+  Microsoft::WRL::ComPtr<ID3D11VideoDecoder> video_decoder;
+  hr = video_device_->CreateVideoDecoder(
+      &desc, &dec_config, video_decoder.ReleaseAndGetAddressOf());
+  if (!video_decoder.Get()) {
+    NotifyError("Failed to create a video decoder");
+    return;
+  }
+  CdmProxyContext* proxy_context = nullptr;
+#if BUILDFLAG(ENABLE_LIBRARY_CDMS)
+  if (cdm_context)
+    proxy_context = cdm_context->GetCdmProxyContext();
+#endif
+  accelerated_video_decoder_ = std::make_unique<H264Decoder>(
+      std::make_unique<D3D11H264Accelerator>(this, media_log_.get(),
+                                             proxy_context, video_decoder,
+                                             video_device_, video_context_),
+      config.color_space_info());
+  // |cdm_context| could be null for clear playback.
+  if (cdm_context) {
+    new_key_callback_registration_ =
+        cdm_context->RegisterNewKeyCB(base::BindRepeating(
+            &D3D11VideoDecoder::NotifyNewKey, weak_factory_.GetWeakPtr()));
+  }
+  // Initialize the gpu side.  We wait until everything else is initialized,
+  // since we allow it to call us back re-entrantly to reduce latency.  Note
+  // that if we're not on the same thread, then we should probably post the
+  // call earlier, since re-entrancy won't be an issue.
  if (impl_task_runner_->RunsTasksInCurrentSequence()) {
-    impl_->Initialize(config, low_delay, cdm_context, init_cb, output_cb,
+    impl_->Initialize(std::move(cb), std::move(return_picture_buffer_cb));
-                      waiting_for_decryption_key_cb);
    return;
  }
@@ -121,56 +240,303 @@ void D3D11VideoDecoder::Initialize(
  // the originals on some other thread.
  // Important but subtle note: base::Bind will copy |config_| since it's a
  // const ref.
-  // TODO(liberato): what's the lifetime of |cdm_context|?
  impl_task_runner_->PostTask(
      FROM_HERE,
-      base::BindOnce(
+      base::BindOnce(&D3D11VideoDecoderImpl::Initialize, impl_weak_,
-          &VideoDecoder::Initialize, impl_weak_, config, low_delay, cdm_context,
+                     BindToCurrentLoop(std::move(cb)),
-          BindToCurrentThreadIfWeakPtr(weak_factory_.GetWeakPtr(), init_cb),
+                     BindToCurrentLoop(std::move(return_picture_buffer_cb))));
-          BindToCurrentThreadIfWeakPtr(weak_factory_.GetWeakPtr(), output_cb),
+}
-          BindToCurrentThreadIfWeakPtr(weak_factory_.GetWeakPtr(),
-                                       waiting_for_decryption_key_cb)));
+void D3D11VideoDecoder::ReceivePictureBufferFromClient(
+    scoped_refptr<D3D11PictureBuffer> buffer) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  // We may decode into this buffer again.
+  // Note that |buffer| might no longer be in |picture_buffers_| if we've
+  // replaced them.  That's okay.
+  buffer->set_in_client_use(false);
+  // Also re-start decoding in case it was waiting for more pictures.
+  DoDecode();
+}
+void D3D11VideoDecoder::OnGpuInitComplete(bool success) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  if (!init_cb_) {
+    // We already failed, so just do nothing.
+    return;
+  }
+  if (!success) {
+    NotifyError("Gpu init failed");
+    return;
+  }
+  state_ = State::kRunning;
+  std::move(init_cb_).Run(true);
 }
 void D3D11VideoDecoder::Decode(scoped_refptr<DecoderBuffer> buffer,
                               const DecodeCB& decode_cb) {
-  if (impl_task_runner_->RunsTasksInCurrentSequence()) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
-    impl_->Decode(std::move(buffer), decode_cb);
+  if (state_ == State::kError) {
+    // TODO(liberato): consider posting, though it likely doesn't matter.
+    decode_cb.Run(DecodeStatus::DECODE_ERROR);
    return;
  }
-  impl_task_runner_->PostTask(
+  input_buffer_queue_.push_back(std::make_pair(std::move(buffer), decode_cb));
+  // Post, since we're not supposed to call back before this returns.  It
+  // probably doesn't matter since we're in the gpu process anyway.
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
      FROM_HERE,
-      base::BindOnce(
+      base::BindOnce(&D3D11VideoDecoder::DoDecode, weak_factory_.GetWeakPtr()));
-          &VideoDecoder::Decode, impl_weak_, std::move(buffer),
-          BindToCurrentThreadIfWeakPtr(weak_factory_.GetWeakPtr(), decode_cb)));
 }
-void D3D11VideoDecoder::Reset(const base::Closure& closure) {
+void D3D11VideoDecoder::DoDecode() {
-  if (impl_task_runner_->RunsTasksInCurrentSequence()) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
-    impl_->Reset(closure);
+  if (state_ != State::kRunning)
    return;
+  if (!current_buffer_) {
+    if (input_buffer_queue_.empty()) {
+      return;
+    }
+    current_buffer_ = std::move(input_buffer_queue_.front().first);
+    current_decode_cb_ = input_buffer_queue_.front().second;
+    input_buffer_queue_.pop_front();
+    if (current_buffer_->end_of_stream()) {
+      // Flush, then signal the decode cb once all pictures have been output.
+      current_buffer_ = nullptr;
+      if (!accelerated_video_decoder_->Flush()) {
+        // This will also signal error |current_decode_cb_|.
+        NotifyError("Flush failed");
+        return;
+      }
+      // Pictures out output synchronously during Flush.  Signal the decode
+      // cb now.
+      std::move(current_decode_cb_).Run(DecodeStatus::OK);
+      return;
+    }
+    // This must be after checking for EOS because there is no timestamp for an
+    // EOS buffer.
+    current_timestamp_ = current_buffer_->timestamp();
+    accelerated_video_decoder_->SetStream(-1, current_buffer_->data(),
+                                          current_buffer_->data_size(),
+                                          current_buffer_->decrypt_config());
  }
-  impl_task_runner_->PostTask(
+  while (true) {
-      FROM_HERE, base::BindOnce(&VideoDecoder::Reset, impl_weak_,
+    // If we transition to the error state, then stop here.
-                                BindToCurrentThreadIfWeakPtr(
+    if (state_ == State::kError)
-                                    weak_factory_.GetWeakPtr(), closure)));
+      return;
+    media::AcceleratedVideoDecoder::DecodeResult result =
+        accelerated_video_decoder_->Decode();
+    // TODO(liberato): switch + class enum.
+    if (result == media::AcceleratedVideoDecoder::kRanOutOfStreamData) {
+      current_buffer_ = nullptr;
+      std::move(current_decode_cb_).Run(DecodeStatus::OK);
+      break;
+    } else if (result == media::AcceleratedVideoDecoder::kRanOutOfSurfaces) {
+      // At this point, we know the picture size.
+      // If we haven't allocated picture buffers yet, then allocate some now.
+      // Otherwise, stop here.  We'll restart when a picture comes back.
+      if (picture_buffers_.size())
+        return;
+      CreatePictureBuffers();
+    } else if (result == media::AcceleratedVideoDecoder::kAllocateNewSurfaces) {
+      CreatePictureBuffers();
+    } else if (result == media::AcceleratedVideoDecoder::kTryAgain) {
+      state_ = State::kWaitingForNewKey;
+      // Note that another DoDecode() task would be posted in NotifyNewKey().
+      return;
+    } else {
+      LOG(ERROR) << "VDA Error " << result;
+      NotifyError("Accelerated decode failed");
+      return;
+    }
+  }
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&D3D11VideoDecoder::DoDecode, weak_factory_.GetWeakPtr()));
+}
+void D3D11VideoDecoder::Reset(const base::RepeatingClosure& closure) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  current_buffer_ = nullptr;
+  if (current_decode_cb_)
+    std::move(current_decode_cb_).Run(DecodeStatus::ABORTED);
+  for (auto& queue_pair : input_buffer_queue_)
+    queue_pair.second.Run(DecodeStatus::ABORTED);
+  input_buffer_queue_.clear();
+  // TODO(liberato): how do we signal an error?
+  accelerated_video_decoder_->Reset();
+  closure.Run();
 }
 bool D3D11VideoDecoder::NeedsBitstreamConversion() const {
-  // Wrong thread, but it's okay.
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
-  return impl_->NeedsBitstreamConversion();
+  return true;
 }
 bool D3D11VideoDecoder::CanReadWithoutStalling() const {
-  // Wrong thread, but it's okay.
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
-  return impl_->CanReadWithoutStalling();
+  return false;
 }
 int D3D11VideoDecoder::GetMaxDecodeRequests() const {
-  // Wrong thread, but it's okay.
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
-  return impl_->GetMaxDecodeRequests();
+  return 4;
+}
+void D3D11VideoDecoder::CreatePictureBuffers() {
+  // TODO(liberato): When we run off the gpu main thread, this call will need
+  // to signal success / failure asynchronously.  We'll need to transition into
+  // a "waiting for pictures" state, since D3D11PictureBuffer will post the gpu
+  // thread work.
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  // TODO(liberato): what's the minimum that we need for the decoder?
+  // the VDA requests 20.
+  const int num_buffers = 20;
+  gfx::Size size = accelerated_video_decoder_->GetPicSize();
+  // Create an array of |num_buffers| elements to back the PictureBuffers.
+  D3D11_TEXTURE2D_DESC texture_desc = {};
+  texture_desc.Width = size.width();
+  texture_desc.Height = size.height();
+  texture_desc.MipLevels = 1;
+  texture_desc.ArraySize = num_buffers;
+  texture_desc.Format = DXGI_FORMAT_NV12;
+  texture_desc.SampleDesc.Count = 1;
+  texture_desc.Usage = D3D11_USAGE_DEFAULT;
+  texture_desc.BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE;
+  texture_desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
+  if (is_encrypted_)
+    texture_desc.MiscFlags |= D3D11_RESOURCE_MISC_HW_PROTECTED;
+  Microsoft::WRL::ComPtr<ID3D11Texture2D> out_texture;
+  HRESULT hr = device_->CreateTexture2D(&texture_desc, nullptr,
+                                        out_texture.ReleaseAndGetAddressOf());
+  if (!SUCCEEDED(hr)) {
+    NotifyError("Failed to create a Texture2D for PictureBuffers");
+    return;
+  }
+  // Drop any old pictures.
+  for (auto& buffer : picture_buffers_)
+    DCHECK(!buffer->in_picture_use());
+  picture_buffers_.clear();
+  // Create each picture buffer.
+  const int textures_per_picture = 2;  // From the VDA
+  for (size_t i = 0; i < num_buffers; i++) {
+    picture_buffers_.push_back(
+        new D3D11PictureBuffer(GL_TEXTURE_EXTERNAL_OES, size, i));
+    if (!picture_buffers_[i]->Init(get_stub_cb_, video_device_, out_texture,
+                                   decoder_guid_, textures_per_picture)) {
+      NotifyError("Unable to allocate PictureBuffer");
+      return;
+    }
+  }
+}
+D3D11PictureBuffer* D3D11VideoDecoder::GetPicture() {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  for (auto& buffer : picture_buffers_) {
+    if (!buffer->in_client_use() && !buffer->in_picture_use()) {
+      buffer->timestamp_ = current_timestamp_;
+      return buffer.get();
+    }
+  }
+  return nullptr;
+}
+void D3D11VideoDecoder::OutputResult(D3D11PictureBuffer* buffer,
+                                     const VideoColorSpace& buffer_colorspace) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  buffer->set_in_client_use(true);
+  // Note: The pixel format doesn't matter.
+  gfx::Rect visible_rect(buffer->size());
+  // TODO(liberato): Pixel aspect ratio should come from the VideoDecoderConfig
+  // (except when it should come from the SPS).
+  // https://crbug.com/837337
+  double pixel_aspect_ratio = 1.0;
+  base::TimeDelta timestamp = buffer->timestamp_;
+  scoped_refptr<VideoFrame> frame = VideoFrame::WrapNativeTextures(
+      PIXEL_FORMAT_NV12, buffer->mailbox_holders(),
+      VideoFrame::ReleaseMailboxCB(), visible_rect.size(), visible_rect,
+      GetNaturalSize(visible_rect, pixel_aspect_ratio), timestamp);
+  // TODO(liberato): bind this to the gpu main thread.
+  frame->SetReleaseMailboxCB(media::BindToCurrentLoop(
+      base::BindOnce(&D3D11VideoDecoderImpl::OnMailboxReleased, impl_weak_,
+                     scoped_refptr<D3D11PictureBuffer>(buffer))));
+  frame->metadata()->SetBoolean(VideoFrameMetadata::POWER_EFFICIENT, true);
+  // For NV12, overlay is allowed by default. If the decoder is going to support
+  // non-NV12 textures, then this may have to be conditionally set. Also note
+  // that ALLOW_OVERLAY is required for encrypted video path.
+  frame->metadata()->SetBoolean(VideoFrameMetadata::ALLOW_OVERLAY, true);
+  if (is_encrypted_) {
+    frame->metadata()->SetBoolean(VideoFrameMetadata::PROTECTED_VIDEO, true);
+    frame->metadata()->SetBoolean(VideoFrameMetadata::REQUIRE_OVERLAY, true);
+  }
+  frame->set_color_space(buffer_colorspace.ToGfxColorSpace());
+  output_cb_.Run(frame);
+}
+void D3D11VideoDecoder::NotifyNewKey() {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+  if (state_ != State::kWaitingForNewKey) {
+    // Note that this method may be called before DoDecode() because the key
+    // acquisition stack may be running independently of the media decoding
+    // stack. So if this isn't in kWaitingForNewKey state no "resuming" is
+    // required therefore no special action taken here.
+    return;
+  }
+  state_ = State::kRunning;
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&D3D11VideoDecoder::DoDecode, weak_factory_.GetWeakPtr()));
+}
+void D3D11VideoDecoder::NotifyError(const char* reason) {
+  state_ = State::kError;
+  DLOG(ERROR) << reason;
+  if (media_log_) {
+    media_log_->AddEvent(media_log_->CreateStringEvent(
+        MediaLogEvent::MEDIA_ERROR_LOG_ENTRY, "error", reason));
+  }
+  if (init_cb_)
+    std::move(init_cb_).Run(false);
+  if (current_decode_cb_)
+    std::move(current_decode_cb_).Run(DecodeStatus::DECODE_ERROR);
+  for (auto& queue_pair : input_buffer_queue_)
+    queue_pair.second.Run(DecodeStatus::DECODE_ERROR);
 }
 void D3D11VideoDecoder::SetCreateDeviceCallbackForTesting(
@@ -180,6 +546,8 @@ void D3D11VideoDecoder::SetCreateDeviceCallbackForTesting(
 void D3D11VideoDecoder::SetWasSupportedReason(
    D3D11VideoNotSupportedReason enum_value) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  UMA_HISTOGRAM_ENUMERATION("Media.D3D11.WasVideoSupported", enum_value);
  const char* reason = nullptr;
@@ -213,6 +581,8 @@ void D3D11VideoDecoder::SetWasSupportedReason(
 bool D3D11VideoDecoder::IsPotentiallySupported(
    const VideoDecoderConfig& config) {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  // TODO(liberato): All of this could be moved into MojoVideoDecoder, so that
  // it could run on the client side and save the IPC hop.

--- a/media/gpu/windows/d3d11_video_decoder.h
+++ b/media/gpu/windows/d3d11_video_decoder.h
@@ -10,29 +10,35 @@
 #include "base/memory/ptr_util.h"
 #include "base/memory/ref_counted.h"
 #include "base/memory/weak_ptr.h"
+#include "base/sequence_checker.h"
 #include "base/sequenced_task_runner.h"
 #include "base/single_thread_task_runner.h"
 #include "gpu/config/gpu_driver_bug_workarounds.h"
 #include "gpu/config/gpu_preferences.h"
-#include "gpu/ipc/service/command_buffer_stub.h"
+#include "media/base/callback_registry.h"
 #include "media/base/video_decoder.h"
 #include "media/gpu/media_gpu_export.h"
 #include "media/gpu/windows/d3d11_create_device_cb.h"
+#include "media/gpu/windows/d3d11_h264_accelerator.h"
+namespace gpu {
+class CommandBufferStub;
+}  // namespace gpu
 namespace media {
+class D3D11PictureBuffer;
 class D3D11VideoDecoderImpl;
 class D3D11VideoDecoderTest;
 class MediaLog;
-// Thread-hopping implementation of D3D11VideoDecoder.  It's meant to run on
+// Video decoder that uses D3D11 directly.  It is intended that this class will
-// a random thread, and hop to the gpu main thread.  It does this so that it
+// run the decoder on whatever thread it lives on.  However, at the moment, it
-// can use the D3D context etc.  What should really happen is that we should
+// only works if it's on the gpu main thread.
-// get (or share with other D3D11VideoDecoder instances) our own context, and
+class MEDIA_GPU_EXPORT D3D11VideoDecoder : public VideoDecoder,
-// just share the D3D texture with the main thread's context.  However, for
+                                           public D3D11VideoDecoderClient {
-// now, it's easier to hop threads.
-class MEDIA_GPU_EXPORT D3D11VideoDecoder : public VideoDecoder {
 public:
+  // |get_stub_cb| must be called from |gpu_task_runner|.
  static std::unique_ptr<VideoDecoder> Create(
      scoped_refptr<base::SingleThreadTaskRunner> gpu_task_runner,
      std::unique_ptr<MediaLog> media_log,
@@ -51,11 +57,16 @@ class MEDIA_GPU_EXPORT D3D11VideoDecoder : public VideoDecoder {
      const WaitingForDecryptionKeyCB& waiting_for_decryption_key_cb) override;
  void Decode(scoped_refptr<DecoderBuffer> buffer,
              const DecodeCB& decode_cb) override;
-  void Reset(const base::Closure& closure) override;
+  void Reset(const base::RepeatingClosure& closure) override;
  bool NeedsBitstreamConversion() const override;
  bool CanReadWithoutStalling() const override;
  int GetMaxDecodeRequests() const override;
+  // D3D11VideoDecoderClient implementation.
+  D3D11PictureBuffer* GetPicture() override;
+  void OutputResult(D3D11PictureBuffer* buffer,
+                    const VideoColorSpace& buffer_colorspace) override;
  // Return false |config| definitely isn't going to work, so that we can fail
  // init without bothering with a thread hop.
  bool IsPotentiallySupported(const VideoDecoderConfig& config);
@@ -72,11 +83,27 @@ class MEDIA_GPU_EXPORT D3D11VideoDecoder : public VideoDecoder {
 private:
  friend class D3D11VideoDecoderTest;
-  D3D11VideoDecoder(scoped_refptr<base::SingleThreadTaskRunner> gpu_task_runner,
+  D3D11VideoDecoder(
-                    std::unique_ptr<MediaLog> media_log,
+      scoped_refptr<base::SingleThreadTaskRunner> gpu_task_runner,
-                    const gpu::GpuPreferences& gpu_preferences,
+      std::unique_ptr<MediaLog> media_log,
-                    const gpu::GpuDriverBugWorkarounds& gpu_workarounds,
+      const gpu::GpuPreferences& gpu_preferences,
-                    std::unique_ptr<D3D11VideoDecoderImpl> impl);
+      const gpu::GpuDriverBugWorkarounds& gpu_workarounds,
+      std::unique_ptr<D3D11VideoDecoderImpl> impl,
+      base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb);
+  // Receive |buffer|, that is now unused by the client.
+  void ReceivePictureBufferFromClient(scoped_refptr<D3D11PictureBuffer> buffer);
+  // Called when the gpu side of initialization is complete.
+  void OnGpuInitComplete(bool success);
+  // Run the decoder loop.
+  void DoDecode();
+  // Create new PictureBuffers.  Currently, this completes synchronously, but
+  // really should have an async interface since it must do some work on the gpu
+  // main thread.
+  void CreatePictureBuffers();
  enum class D3D11VideoNotSupportedReason {
    kVideoIsSupported = 0,
@@ -104,16 +131,39 @@ class MEDIA_GPU_EXPORT D3D11VideoDecoder : public VideoDecoder {
  std::unique_ptr<MediaLog> media_log_;
+  enum class State {
+    // Initializing resources required to create a codec.
+    kInitializing,
+    // Initialization has completed and we're running. This is the only state
+    // in which |codec_| might be non-null. If |codec_| is null, a codec
+    // creation is pending.
+    kRunning,
+    // The decoder cannot make progress because it doesn't have the key to
+    // decrypt the buffer. Waiting for a new key to be available.
+    // This should only be transitioned from kRunning, and should only
+    // transition to kRunning.
+    kWaitingForNewKey,
+    // A fatal error occurred. A terminal state.
+    kError,
+  };
  // Record a UMA about why IsPotentiallySupported returned false, or that it
  // returned true.  Also will add a MediaLog entry, etc.
  void SetWasSupportedReason(D3D11VideoNotSupportedReason enum_value);
+  // Callback to notify that new usable key is available.
+  void NotifyNewKey();
+  // Enter the kError state.  This will fail any pending |init_cb_| and / or
+  // pending decode as well.
+  void NotifyError(const char* reason);
  // The implementation, which we trampoline to the impl thread.
  // This must be freed on the impl thread.
  std::unique_ptr<D3D11VideoDecoderImpl> impl_;
  // Weak ptr to |impl_|, which we use for callbacks.
-  base::WeakPtr<VideoDecoder> impl_weak_;
+  base::WeakPtr<D3D11VideoDecoderImpl> impl_weak_;
  // Task runner for |impl_|.  This must be the GPU main thread.
  scoped_refptr<base::SequencedTaskRunner> impl_task_runner_;
@@ -121,8 +171,44 @@ class MEDIA_GPU_EXPORT D3D11VideoDecoder : public VideoDecoder {
  gpu::GpuPreferences gpu_preferences_;
  gpu::GpuDriverBugWorkarounds gpu_workarounds_;
+  // During init, these will be set.
+  InitCB init_cb_;
+  OutputCB output_cb_;
+  bool is_encrypted_ = false;
  D3D11CreateDeviceCB create_device_func_;
+  Microsoft::WRL::ComPtr<ID3D11Device> device_;
+  Microsoft::WRL::ComPtr<ID3D11DeviceContext> device_context_;
+  Microsoft::WRL::ComPtr<ID3D11VideoDevice> video_device_;
+  Microsoft::WRL::ComPtr<ID3D11VideoContext1> video_context_;
+  std::unique_ptr<AcceleratedVideoDecoder> accelerated_video_decoder_;
+  GUID decoder_guid_;
+  std::list<std::pair<scoped_refptr<DecoderBuffer>, DecodeCB>>
+      input_buffer_queue_;
+  scoped_refptr<DecoderBuffer> current_buffer_;
+  DecodeCB current_decode_cb_;
+  base::TimeDelta current_timestamp_;
+  // Callback registration to keep the new key callback registered.
+  std::unique_ptr<CallbackRegistration> new_key_callback_registration_;
+  // Must be called on the gpu main thread.  So, don't call it from here, since
+  // we don't know what thread we're on.
+  base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb_;
+  // It would be nice to unique_ptr these, but we give a ref to the VideoFrame
+  // so that the texture is retained until the mailbox is opened.
+  std::vector<scoped_refptr<D3D11PictureBuffer>> picture_buffers_;
+  State state_ = State::kInitializing;
+  // Entire class should be single-sequence.
+  SEQUENCE_CHECKER(sequence_checker_);
  base::WeakPtrFactory<D3D11VideoDecoder> weak_factory_;
  DISALLOW_COPY_AND_ASSIGN(D3D11VideoDecoder);

--- a/media/gpu/windows/d3d11_video_decoder_impl.cc
+++ b/media/gpu/windows/d3d11_video_decoder_impl.cc
@@ -4,23 +4,11 @@
 #include "media/gpu/windows/d3d11_video_decoder_impl.h"
-#include <d3d11_4.h>
+#include "gpu/command_buffer/common/sync_token.h"
-#include "base/threading/sequenced_task_runner_handle.h"
-#include "gpu/command_buffer/service/mailbox_manager.h"
 #include "gpu/command_buffer/service/scheduler.h"
-#include "gpu/command_buffer/service/texture_manager.h"
 #include "gpu/ipc/service/gpu_channel.h"
-#include "media/base/bind_to_current_loop.h"
-#include "media/base/cdm_context.h"
-#include "media/base/decoder_buffer.h"
 #include "media/base/media_log.h"
-#include "media/base/video_decoder_config.h"
-#include "media/base/video_frame.h"
-#include "media/base/video_util.h"
 #include "media/gpu/windows/d3d11_picture_buffer.h"
-#include "ui/gl/gl_angle_util_win.h"
-#include "ui/gl/gl_bindings.h"
 namespace media {
@@ -37,377 +25,48 @@ D3D11VideoDecoderImpl::D3D11VideoDecoderImpl(
    base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb)
    : media_log_(std::move(media_log)),
      get_stub_cb_(get_stub_cb),
-      weak_factory_(this) {}
+      weak_factory_(this) {
+  // May be called from any thread.
+}
 D3D11VideoDecoderImpl::~D3D11VideoDecoderImpl() {
-  // TODO(liberato): be sure to clear |picture_buffers_| on the main thread.
+  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
-  // For now, we always run on the main thread anyway.
  if (stub_ && !wait_sequence_id_.is_null())
    stub_->channel()->scheduler()->DestroySequence(wait_sequence_id_);
 }
-std::string D3D11VideoDecoderImpl::GetDisplayName() const {
-  NOTREACHED() << "Nobody should ask D3D11VideoDecoderImpl for its name";
-  return "D3D11VideoDecoderImpl";
-}
 void D3D11VideoDecoderImpl::Initialize(
-    const VideoDecoderConfig& config,
+    InitCB init_cb,
-    bool low_delay,
+    ReturnPictureBufferCB return_picture_buffer_cb) {
-    CdmContext* cdm_context,
+  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
-    const InitCB& init_cb,
-    const OutputCB& output_cb,
-    const WaitingForDecryptionKeyCB& waiting_for_decryption_key_cb) {
-  init_cb_ = init_cb;
-  output_cb_ = output_cb;
-  is_encrypted_ = config.is_encrypted();
  stub_ = get_stub_cb_.Run();
  if (!MakeContextCurrent(stub_)) {
-    NotifyError("Failed to get decoder stub");
+    const char* reason = "Failed to get decoder stub";
+    DLOG(ERROR) << reason;
+    if (media_log_) {
+      media_log_->AddEvent(media_log_->CreateStringEvent(
+          MediaLogEvent::MEDIA_ERROR_LOG_ENTRY, "error", reason));
+    }
+    std::move(init_cb).Run(false);
    return;
  }
+  return_picture_buffer_cb_ = std::move(return_picture_buffer_cb);
  // TODO(liberato): see GpuVideoFrameFactory.
  // stub_->AddDestructionObserver(this);
  wait_sequence_id_ = stub_->channel()->scheduler()->CreateSequence(
      gpu::SchedulingPriority::kNormal);
-  // Use the ANGLE device, rather than create our own.  It would be nice if we
+  std::move(init_cb).Run(true);
-  // could use our own device, and run on the mojo thread, but texture sharing
-  // seems to be difficult.
-  device_ = gl::QueryD3D11DeviceObjectFromANGLE();
-  device_->GetImmediateContext(device_context_.ReleaseAndGetAddressOf());
-  HRESULT hr;
-  // TODO(liberato): Handle cleanup better.  Also consider being less chatty in
-  // the logs, since this will fall back.
-  hr = device_context_.CopyTo(video_context_.ReleaseAndGetAddressOf());
-  if (!SUCCEEDED(hr)) {
-    NotifyError("Failed to get device context");
-    return;
-  }
-  hr = device_.CopyTo(video_device_.ReleaseAndGetAddressOf());
-  if (!SUCCEEDED(hr)) {
-    NotifyError("Failed to get video device");
-    return;
-  }
-  GUID needed_guid;
-  memcpy(&needed_guid, &D3D11_DECODER_PROFILE_H264_VLD_NOFGT,
-         sizeof(needed_guid));
-  GUID decoder_guid = {};
-  {
-    // Enumerate supported video profiles and look for the H264 profile.
-    bool found = false;
-    UINT profile_count = video_device_->GetVideoDecoderProfileCount();
-    for (UINT profile_idx = 0; profile_idx < profile_count; profile_idx++) {
-      GUID profile_id = {};
-      hr = video_device_->GetVideoDecoderProfile(profile_idx, &profile_id);
-      if (SUCCEEDED(hr) && (profile_id == needed_guid)) {
-        decoder_guid = profile_id;
-        found = true;
-        break;
-      }
-    }
-    if (!found) {
-      NotifyError("Did not find a supported profile");
-      return;
-    }
-  }
-  // TODO(liberato): dxva does this.  don't know if we need to.
-  Microsoft::WRL::ComPtr<ID3D11Multithread> multi_threaded;
-  hr = device_->QueryInterface(IID_PPV_ARGS(&multi_threaded));
-  if (!SUCCEEDED(hr)) {
-    NotifyError("Failed to query ID3D11Multithread");
-    return;
-  }
-  multi_threaded->SetMultithreadProtected(TRUE);
-  D3D11_VIDEO_DECODER_DESC desc = {};
-  desc.Guid = decoder_guid;
-  desc.SampleWidth = config.coded_size().width();
-  desc.SampleHeight = config.coded_size().height();
-  desc.OutputFormat = DXGI_FORMAT_NV12;
-  UINT config_count = 0;
-  hr = video_device_->GetVideoDecoderConfigCount(&desc, &config_count);
-  if (FAILED(hr) || config_count == 0) {
-    NotifyError("Failed to get video decoder config count");
-    return;
-  }
-  D3D11_VIDEO_DECODER_CONFIG dec_config = {};
-  bool found = false;
-  for (UINT i = 0; i < config_count; i++) {
-    hr = video_device_->GetVideoDecoderConfig(&desc, i, &dec_config);
-    if (FAILED(hr)) {
-      NotifyError("Failed to get decoder config");
-      return;
-    }
-    if (dec_config.ConfigBitstreamRaw == 2) {
-      found = true;
-      break;
-    }
-  }
-  if (!found) {
-    NotifyError("Failed to find decoder config");
-    return;
-  }
-  if (is_encrypted_)
-    dec_config.guidConfigBitstreamEncryption = D3D11_DECODER_ENCRYPTION_HW_CENC;
-  memcpy(&decoder_guid_, &decoder_guid, sizeof decoder_guid_);
-  Microsoft::WRL::ComPtr<ID3D11VideoDecoder> video_decoder;
-  hr = video_device_->CreateVideoDecoder(
-      &desc, &dec_config, video_decoder.ReleaseAndGetAddressOf());
-  if (!video_decoder.Get()) {
-    NotifyError("Failed to create a video decoder");
-    return;
-  }
-  CdmProxyContext* proxy_context = nullptr;
-#if BUILDFLAG(ENABLE_LIBRARY_CDMS)
-  if (cdm_context)
-    proxy_context = cdm_context->GetCdmProxyContext();
-#endif
-  accelerated_video_decoder_ = std::make_unique<H264Decoder>(
-      std::make_unique<D3D11H264Accelerator>(this, media_log_.get(),
-                                             proxy_context, video_decoder,
-                                             video_device_, video_context_),
-      config.color_space_info());
-  // |cdm_context| could be null for clear playback.
-  if (cdm_context) {
-    new_key_callback_registration_ =
-        cdm_context->RegisterNewKeyCB(base::BindRepeating(
-            &D3D11VideoDecoderImpl::NotifyNewKey, weak_factory_.GetWeakPtr()));
-  }
-  state_ = State::kRunning;
-  std::move(init_cb_).Run(true);
-}
-void D3D11VideoDecoderImpl::Decode(scoped_refptr<DecoderBuffer> buffer,
-                                   const DecodeCB& decode_cb) {
-  if (state_ == State::kError) {
-    // TODO(liberato): consider posting, though it likely doesn't matter.
-    decode_cb.Run(DecodeStatus::DECODE_ERROR);
-    return;
-  }
-  input_buffer_queue_.push_back(std::make_pair(std::move(buffer), decode_cb));
-  // Post, since we're not supposed to call back before this returns.  It
-  // probably doesn't matter since we're in the gpu process anyway.
-  base::ThreadTaskRunnerHandle::Get()->PostTask(
-      FROM_HERE,
-      base::Bind(&D3D11VideoDecoderImpl::DoDecode, weak_factory_.GetWeakPtr()));
-}
-void D3D11VideoDecoderImpl::DoDecode() {
-  if (state_ != State::kRunning)
-    return;
-  if (!current_buffer_) {
-    if (input_buffer_queue_.empty()) {
-      return;
-    }
-    current_buffer_ = std::move(input_buffer_queue_.front().first);
-    current_decode_cb_ = input_buffer_queue_.front().second;
-    input_buffer_queue_.pop_front();
-    if (current_buffer_->end_of_stream()) {
-      // Flush, then signal the decode cb once all pictures have been output.
-      current_buffer_ = nullptr;
-      if (!accelerated_video_decoder_->Flush()) {
-        // This will also signal error |current_decode_cb_|.
-        NotifyError("Flush failed");
-        return;
-      }
-      // Pictures out output synchronously during Flush.  Signal the decode
-      // cb now.
-      std::move(current_decode_cb_).Run(DecodeStatus::OK);
-      return;
-    }
-    // This must be after checking for EOS because there is no timestamp for an
-    // EOS buffer.
-    current_timestamp_ = current_buffer_->timestamp();
-    accelerated_video_decoder_->SetStream(-1, current_buffer_->data(),
-                                          current_buffer_->data_size(),
-                                          current_buffer_->decrypt_config());
-  }
-  while (true) {
-    // If we transition to the error state, then stop here.
-    if (state_ == State::kError)
-      return;
-    media::AcceleratedVideoDecoder::DecodeResult result =
-        accelerated_video_decoder_->Decode();
-    // TODO(liberato): switch + class enum.
-    if (result == media::AcceleratedVideoDecoder::kRanOutOfStreamData) {
-      current_buffer_ = nullptr;
-      std::move(current_decode_cb_).Run(DecodeStatus::OK);
-      break;
-    } else if (result == media::AcceleratedVideoDecoder::kRanOutOfSurfaces) {
-      // At this point, we know the picture size.
-      // If we haven't allocated picture buffers yet, then allocate some now.
-      // Otherwise, stop here.  We'll restart when a picture comes back.
-      if (picture_buffers_.size())
-        return;
-      CreatePictureBuffers();
-    } else if (result == media::AcceleratedVideoDecoder::kAllocateNewSurfaces) {
-      CreatePictureBuffers();
-    } else if (result == media::AcceleratedVideoDecoder::kTryAgain) {
-      state_ = State::kWaitingForNewKey;
-      // Note that another DoDecode() task would be posted in NotifyNewKey().
-      return;
-    } else {
-      LOG(ERROR) << "VDA Error " << result;
-      NotifyError("Accelerated decode failed");
-      return;
-    }
-  }
-  base::ThreadTaskRunnerHandle::Get()->PostTask(
-      FROM_HERE,
-      base::Bind(&D3D11VideoDecoderImpl::DoDecode, weak_factory_.GetWeakPtr()));
-}
-void D3D11VideoDecoderImpl::Reset(const base::Closure& closure) {
-  current_buffer_ = nullptr;
-  if (current_decode_cb_)
-    std::move(current_decode_cb_).Run(DecodeStatus::ABORTED);
-  for (auto& queue_pair : input_buffer_queue_)
-    queue_pair.second.Run(DecodeStatus::ABORTED);
-  input_buffer_queue_.clear();
-  // TODO(liberato): how do we signal an error?
-  accelerated_video_decoder_->Reset();
-  closure.Run();
-}
-bool D3D11VideoDecoderImpl::NeedsBitstreamConversion() const {
-  // This is called from multiple threads.
-  return true;
-}
-bool D3D11VideoDecoderImpl::CanReadWithoutStalling() const {
-  // This is called from multiple threads.
-  return false;
-}
-int D3D11VideoDecoderImpl::GetMaxDecodeRequests() const {
-  // This is called from multiple threads.
-  return 4;
-}
-void D3D11VideoDecoderImpl::CreatePictureBuffers() {
-  // TODO(liberato): what's the minimum that we need for the decoder?
-  // the VDA requests 20.
-  const int num_buffers = 20;
-  gfx::Size size = accelerated_video_decoder_->GetPicSize();
-  // Create an array of |num_buffers| elements to back the PictureBuffers.
-  D3D11_TEXTURE2D_DESC texture_desc = {};
-  texture_desc.Width = size.width();
-  texture_desc.Height = size.height();
-  texture_desc.MipLevels = 1;
-  texture_desc.ArraySize = num_buffers;
-  texture_desc.Format = DXGI_FORMAT_NV12;
-  texture_desc.SampleDesc.Count = 1;
-  texture_desc.Usage = D3D11_USAGE_DEFAULT;
-  texture_desc.BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE;
-  texture_desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED;
-  if (is_encrypted_)
-    texture_desc.MiscFlags |= D3D11_RESOURCE_MISC_HW_PROTECTED;
-  Microsoft::WRL::ComPtr<ID3D11Texture2D> out_texture;
-  HRESULT hr = device_->CreateTexture2D(&texture_desc, nullptr,
-                                        out_texture.ReleaseAndGetAddressOf());
-  if (!SUCCEEDED(hr)) {
-    NotifyError("Failed to create a Texture2D for PictureBuffers");
-    return;
-  }
-  // Drop any old pictures.
-  for (auto& buffer : picture_buffers_)
-    DCHECK(!buffer->in_picture_use());
-  picture_buffers_.clear();
-  // Create each picture buffer.
-  const int textures_per_picture = 2;  // From the VDA
-  for (size_t i = 0; i < num_buffers; i++) {
-    picture_buffers_.push_back(
-        new D3D11PictureBuffer(GL_TEXTURE_EXTERNAL_OES, size, i));
-    if (!picture_buffers_[i]->Init(get_stub_cb_, video_device_, out_texture,
-                                   decoder_guid_, textures_per_picture)) {
-      NotifyError("Unable to allocate PictureBuffer");
-      return;
-    }
-  }
-}
-D3D11PictureBuffer* D3D11VideoDecoderImpl::GetPicture() {
-  for (auto& buffer : picture_buffers_) {
-    if (!buffer->in_client_use() && !buffer->in_picture_use()) {
-      buffer->timestamp_ = current_timestamp_;
-      return buffer.get();
-    }
-  }
-  return nullptr;
-}
-void D3D11VideoDecoderImpl::OutputResult(
-    D3D11PictureBuffer* buffer,
-    const VideoColorSpace& buffer_colorspace) {
-  buffer->set_in_client_use(true);
-  // Note: The pixel format doesn't matter.
-  gfx::Rect visible_rect(buffer->size());
-  // TODO(liberato): Pixel aspect ratio should come from the VideoDecoderConfig
-  // (except when it should come from the SPS).
-  // https://crbug.com/837337
-  double pixel_aspect_ratio = 1.0;
-  base::TimeDelta timestamp = buffer->timestamp_;
-  scoped_refptr<VideoFrame> frame = VideoFrame::WrapNativeTextures(
-      PIXEL_FORMAT_NV12, buffer->mailbox_holders(),
-      VideoFrame::ReleaseMailboxCB(), visible_rect.size(), visible_rect,
-      GetNaturalSize(visible_rect, pixel_aspect_ratio), timestamp);
-  frame->SetReleaseMailboxCB(media::BindToCurrentLoop(base::BindOnce(
-      &D3D11VideoDecoderImpl::OnMailboxReleased, weak_factory_.GetWeakPtr(),
-      scoped_refptr<D3D11PictureBuffer>(buffer))));
-  frame->metadata()->SetBoolean(VideoFrameMetadata::POWER_EFFICIENT, true);
-  // For NV12, overlay is allowed by default. If the decoder is going to support
-  // non-NV12 textures, then this may have to be conditionally set. Also note
-  // that ALLOW_OVERLAY is required for encrypted video path.
-  frame->metadata()->SetBoolean(VideoFrameMetadata::ALLOW_OVERLAY, true);
-  if (is_encrypted_) {
-    frame->metadata()->SetBoolean(VideoFrameMetadata::PROTECTED_VIDEO, true);
-    frame->metadata()->SetBoolean(VideoFrameMetadata::REQUIRE_OVERLAY, true);
-  }
-  frame->set_color_space(buffer_colorspace.ToGfxColorSpace());
-  output_cb_.Run(frame);
 }
 void D3D11VideoDecoderImpl::OnMailboxReleased(
    scoped_refptr<D3D11PictureBuffer> buffer,
    const gpu::SyncToken& sync_token) {
-  // Note that |buffer| might no longer be in |picture_buffers_| if we've
+  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
-  // replaced them.  That's okay.
  stub_->channel()->scheduler()->ScheduleTask(gpu::Scheduler::Task(
      wait_sequence_id_,
@@ -418,52 +77,14 @@ void D3D11VideoDecoderImpl::OnMailboxReleased(
 void D3D11VideoDecoderImpl::OnSyncTokenReleased(
    scoped_refptr<D3D11PictureBuffer> buffer) {
-  // Note that |buffer| might no longer be in |picture_buffers_|.
+  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
-  buffer->set_in_client_use(false);
-  // Also re-start decoding in case it was waiting for more pictures.
+  return_picture_buffer_cb_.Run(std::move(buffer));
-  // TODO(liberato): there might be something pending already.  we should
-  // probably check.
-  base::ThreadTaskRunnerHandle::Get()->PostTask(
-      FROM_HERE,
-      base::BindOnce(&D3D11VideoDecoderImpl::DoDecode, GetWeakPtr()));
 }
 base::WeakPtr<D3D11VideoDecoderImpl> D3D11VideoDecoderImpl::GetWeakPtr() {
+  // May be called from any thread.
  return weak_factory_.GetWeakPtr();
 }
-void D3D11VideoDecoderImpl::NotifyNewKey() {
-  if (state_ != State::kWaitingForNewKey) {
-    // Note that this method may be called before DoDecode() because the key
-    // acquisition stack may be running independently of the media decoding
-    // stack. So if this isn't in kWaitingForNewKey state no "resuming" is
-    // required therefore no special action taken here.
-    return;
-  }
-  state_ = State::kRunning;
-  base::ThreadTaskRunnerHandle::Get()->PostTask(
-      FROM_HERE, base::BindOnce(&D3D11VideoDecoderImpl::DoDecode,
-                                weak_factory_.GetWeakPtr()));
-}
-void D3D11VideoDecoderImpl::NotifyError(const char* reason) {
-  state_ = State::kError;
-  DLOG(ERROR) << reason;
-  if (media_log_) {
-    media_log_->AddEvent(media_log_->CreateStringEvent(
-        MediaLogEvent::MEDIA_ERROR_LOG_ENTRY, "error", reason));
-  }
-  if (init_cb_)
-    std::move(init_cb_).Run(false);
-  if (current_decode_cb_)
-    std::move(current_decode_cb_).Run(DecodeStatus::DECODE_ERROR);
-  for (auto& queue_pair : input_buffer_queue_)
-    queue_pair.second.Run(DecodeStatus::DECODE_ERROR);
-}
 }  // namespace media
--- a/media/gpu/windows/d3d11_video_decoder_impl.h
+++ b/media/gpu/windows/d3d11_video_decoder_impl.h
@@ -13,122 +13,71 @@
 #include <string>
 #include <tuple>
+#include "base/callback.h"
 #include "base/memory/ref_counted.h"
 #include "base/memory/weak_ptr.h"
+#include "base/threading/thread_checker.h"
 #include "gpu/command_buffer/service/sequence_id.h"
-#include "gpu/ipc/service/command_buffer_stub.h"
-#include "media/base/callback_registry.h"
-#include "media/base/video_decoder.h"
-#include "media/base/video_decoder_config.h"
-#include "media/gpu/gles2_decoder_helper.h"
 #include "media/gpu/media_gpu_export.h"
-#include "media/gpu/windows/d3d11_h264_accelerator.h"
-#include "media/gpu/windows/output_with_release_mailbox_cb.h"
+namespace gpu {
+class CommandBufferStub;
+struct SyncToken;
+}  // namespace gpu
 namespace media {
 class MediaLog;
+class D3D11PictureBuffer;
-class MEDIA_GPU_EXPORT D3D11VideoDecoderImpl : public VideoDecoder,
+// Does the gpu main thread work for D3D11VideoDecoder.  Except as noted, this
-                                               public D3D11VideoDecoderClient {
+// class lives on the GPU main thread.
+// TODO(liberato): Rename this class as a follow-on to this refactor.
+class MEDIA_GPU_EXPORT D3D11VideoDecoderImpl {
 public:
+  // May be constructed on any thread.
  explicit D3D11VideoDecoderImpl(
      std::unique_ptr<MediaLog> media_log,
      base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb);
-  ~D3D11VideoDecoderImpl() override;
+  virtual ~D3D11VideoDecoderImpl();
-  // VideoDecoder implementation:
-  std::string GetDisplayName() const override;
-  void Initialize(
-      const VideoDecoderConfig& config,
-      bool low_delay,
-      CdmContext* cdm_context,
-      const InitCB& init_cb,
-      const OutputCB& output_cb,
-      const WaitingForDecryptionKeyCB& waiting_for_decryption_key_cb) override;
-  void Decode(scoped_refptr<DecoderBuffer> buffer,
-              const DecodeCB& decode_cb) override;
-  void Reset(const base::Closure& closure) override;
-  bool NeedsBitstreamConversion() const override;
-  bool CanReadWithoutStalling() const override;
-  int GetMaxDecodeRequests() const override;
-  // D3D11VideoDecoderClient implementation.
-  D3D11PictureBuffer* GetPicture() override;
-  void OutputResult(D3D11PictureBuffer* buffer,
-                    const VideoColorSpace& buffer_colorspace) override;
-  // Return a weak ptr, since D3D11VideoDecoder constructs callbacks for us.
+  using InitCB = base::OnceCallback<void(bool success)>;
-  base::WeakPtr<D3D11VideoDecoderImpl> GetWeakPtr();
- private:
+  // Returns a picture buffer that's no longer in use by the client.
-  enum class State {
+  using ReturnPictureBufferCB =
-    // Initializing resources required to create a codec.
+      base::RepeatingCallback<void(scoped_refptr<D3D11PictureBuffer>)>;
-    kInitializing,
-    // Initialization has completed and we're running. This is the only state
+  // We will call back |init_cb| with the init status.  |try_decoding_cb| should
-    // in which |codec_| might be non-null. If |codec_| is null, a codec
+  // try to re-start decoding.  We'll call this when we do something that might
-    // creation is pending.
+  // allow decoding to make progress, such as reclaim a picture buffer.
-    kRunning,
+  virtual void Initialize(InitCB init_cb,
-    // The decoder cannot make progress because it doesn't have the key to
+                          ReturnPictureBufferCB return_picture_buffer_cb);
-    // decrypt the buffer. Waiting for a new key to be available.
-    // This should only be transitioned from kRunning, and should only
-    // transition to kRunning.
-    kWaitingForNewKey,
-    // A fatal error occurred. A terminal state.
-    kError,
-  };
-  void DoDecode();
-  void CreatePictureBuffers();
+  // Called when the VideoFrame that uses |buffer| is freed.
  void OnMailboxReleased(scoped_refptr<D3D11PictureBuffer> buffer,
                         const gpu::SyncToken& sync_token);
-  void OnSyncTokenReleased(scoped_refptr<D3D11PictureBuffer> buffer);
-  // Callback to notify that new usable key is available.
+  // Return a weak ptr, since D3D11VideoDecoder constructs callbacks for us.
-  void NotifyNewKey();
+  // May be called from any thread.
+  base::WeakPtr<D3D11VideoDecoderImpl> GetWeakPtr();
-  // Enter the kError state.  This will fail any pending |init_cb_| and / or
+ private:
-  // pending decode as well.
+  void OnSyncTokenReleased(scoped_refptr<D3D11PictureBuffer> buffer);
-  void NotifyError(const char* reason);
  std::unique_ptr<MediaLog> media_log_;
  base::RepeatingCallback<gpu::CommandBufferStub*()> get_stub_cb_;
  gpu::CommandBufferStub* stub_ = nullptr;
-  Microsoft::WRL::ComPtr<ID3D11Device> device_;
-  Microsoft::WRL::ComPtr<ID3D11DeviceContext> device_context_;
-  Microsoft::WRL::ComPtr<ID3D11VideoDevice> video_device_;
-  Microsoft::WRL::ComPtr<ID3D11VideoContext1> video_context_;
-  std::unique_ptr<AcceleratedVideoDecoder> accelerated_video_decoder_;
-  GUID decoder_guid_;
-  std::list<std::pair<scoped_refptr<DecoderBuffer>, DecodeCB>>
-      input_buffer_queue_;
-  scoped_refptr<DecoderBuffer> current_buffer_;
-  DecodeCB current_decode_cb_;
-  base::TimeDelta current_timestamp_;
-  // During init, these will be set.
-  InitCB init_cb_;
-  OutputCB output_cb_;
-  bool is_encrypted_ = false;
-  // It would be nice to unique_ptr these, but we give a ref to the VideoFrame
-  // so that the texture is retained until the mailbox is opened.
-  std::vector<scoped_refptr<D3D11PictureBuffer>> picture_buffers_;
-  State state_ = State::kInitializing;
-  // Callback registration to keep the new key callback registered.
-  std::unique_ptr<CallbackRegistration> new_key_callback_registration_;
  // Wait sequence for sync points.
  gpu::SequenceId wait_sequence_id_;
+  // Called when we get a picture buffer back from the client.
+  ReturnPictureBufferCB return_picture_buffer_cb_;
+  // Has thread affinity -- must be run on the gpu main thread.
+  THREAD_CHECKER(thread_checker_);
  base::WeakPtrFactory<D3D11VideoDecoderImpl> weak_factory_;
  DISALLOW_COPY_AND_ASSIGN(D3D11VideoDecoderImpl);

--- a/media/gpu/windows/d3d11_video_decoder_unittest.cc
+++ b/media/gpu/windows/d3d11_video_decoder_unittest.cc
@@ -36,19 +36,12 @@ class MockD3D11VideoDecoderImpl : public D3D11VideoDecoderImpl {
            nullptr,
            base::RepeatingCallback<gpu::CommandBufferStub*()>()) {}
-  MOCK_METHOD6(
+  void Initialize(InitCB init_cb,
-      Initialize,
+                  ReturnPictureBufferCB return_picture_buffer_cb) override {
-      void(const VideoDecoderConfig& config,
+    MockInitialize();
-           bool low_delay,
+  }
-           CdmContext* cdm_context,
-           const InitCB& init_cb,
+  MOCK_METHOD0(MockInitialize, void());
-           const OutputCB& output_cb,
-           const WaitingForDecryptionKeyCB& waiting_for_decryption_key_cb));
-  MOCK_METHOD2(Decode,
-               void(scoped_refptr<DecoderBuffer> buffer,
-                    const DecodeCB& decode_cb));
-  MOCK_METHOD1(Reset, void(const base::RepeatingClosure& closure));
 };
 class D3D11VideoDecoderTest : public ::testing::Test {
@@ -79,7 +72,9 @@ class D3D11VideoDecoderTest : public ::testing::Test {
    decoder_ = base::WrapUnique<VideoDecoder>(
        d3d11_decoder_raw_ = new D3D11VideoDecoder(
            gpu_task_runner_, nullptr /* MediaLog */, gpu_preferences_,
-            gpu_workarounds_, std::move(impl)));
+            gpu_workarounds_, std::move(impl),
+            base::BindRepeating(
+                []() -> gpu::CommandBufferStub* { return nullptr; })));
    d3d11_decoder_raw_->SetCreateDeviceCallbackForTesting(
        base::BindRepeating(&D3D11CreateDeviceMock::Create,
                            base::Unretained(&create_device_mock_)));
@@ -102,7 +97,7 @@ class D3D11VideoDecoderTest : public ::testing::Test {
    if (expectation == kExpectSuccess) {
      EXPECT_CALL(*this, MockInitCB(_)).Times(0);
-      EXPECT_CALL(*impl_, Initialize(_, low_delay, cdm_context, _, _, _));
+      EXPECT_CALL(*impl_, MockInitialize());
    } else {
      EXPECT_CALL(*this, MockInitCB(false));
    }
@@ -164,7 +159,10 @@ TEST_F(D3D11VideoDecoderTest, SupportsH264) {
  CreateDecoder();
  // Make sure that we're testing H264.
  ASSERT_EQ(supported_config_.profile(), H264PROFILE_MAIN);
-  InitializeDecoder(supported_config_, kExpectSuccess);
+  // We do not actually try to initialize the decoder, since we don't mock
+  // out enough of D3D for that to work.  Instead, we just check that
+  // IsPotentiallySupported is correct.
+  EXPECT_TRUE(d3d11_decoder_raw_->IsPotentiallySupported(supported_config_));
 }
 TEST_F(D3D11VideoDecoderTest, DoesNotSupportVP8) {