RELAND2: Vaapi decode: split |decoder_|s GetRequiredNumOfPictures()

This CL is a smart-rebase-and-fix of the CLs below: it introduces a new parameter |use_reduced_number_of_allocations_| to allow for the new working mode described below and to temporarily circumvent the GtsExoPlayerTestCases failures (b/121169667 and b/121003733); this new flag is false when |output_mode_| is IMPORT, so all ARC++ cases should work bc left untouched. OTOH a mem savings overview can be found at https://goo.gl/3PaMiA. Original CL description ----------------------------------------------- Vaapi decode: split |decoder_|s GetRequiredNumOfPictures() This CL reduces the amount of PictureBuffers requested to be allocated by the |client_| when we are not |decode_using_client_picture_buffers_|. Instead, it "splits" the requested allocations into - the actual needed PictureBuffers (A) - the codec's requested reference frames (B) (a new method GetNumReferenceFrames() is added to AcceleratedVideoDecoder for this). This splitting saves a lot of memory, since we allocate A+B buffers instead of 2*(A+B). (B is 5 and A is 4-VP8, 4-12 H264/VP9) Test: crosvideo changing resolutions for each codec, v_d_a_unittest on nocturne (KBL) and caroline (SKL). Bug: 912295 Reviewed-on: https://chromium-review.googlesource.com/c/1363807Reviewed-by: Hirokazu Honda <hiroh@chromium.org> Commit-Queue: Miguel Casas <mcasas@chromium.org> Cr-Original-Original-Commit-Position: refs/heads/master@{#615571} Reviewed-on: https://chromium-review.googlesource.com/c/1379274 Cr-Original-Commit-Position: refs/heads/master@{#617366} Change-Id: Ibf9a1455f8df4d52b77aee8e01f15c02878947ae Reviewed-on: https://chromium-review.googlesource.com/c/1387391 Cr-Commit-Position: refs/heads/master@{#620025}

RELAND2: Vaapi decode: split |decoder_|s GetRequiredNumOfPictures()
This CL is a smart-rebase-and-fix of the CLs below: it introduces a new parameter |use_reduced_number_of_allocations_| to allow for the new working mode described below and to temporarily circumvent the GtsExoPlayerTestCases failures (b/121169667 and b/121003733); this new flag is false when |output_mode_| is IMPORT, so all ARC++ cases should work bc left untouched. OTOH a mem savings overview can be found at https://goo.gl/3PaMiA. Original CL description ----------------------------------------------- Vaapi decode: split |decoder_|s GetRequiredNumOfPictures() This CL reduces the amount of PictureBuffers requested to be allocated by the |client_| when we are not |decode_using_client_picture_buffers_|. Instead, it "splits" the requested allocations into - the actual needed PictureBuffers (A) - the codec's requested reference frames (B) (a new method GetNumReferenceFrames() is added to AcceleratedVideoDecoder for this). This splitting saves a lot of memory, since we allocate A+B buffers instead of 2*(A+B). (B is 5 and A is 4-VP8, 4-12 H264/VP9) Test: crosvideo changing resolutions for each codec, v_d_a_unittest on nocturne (KBL) and caroline (SKL). Bug: 912295 Reviewed-on: https://chromium-review.googlesource.com/c/1363807Reviewed-by: Hirokazu Honda <hiroh@chromium.org> Commit-Queue: Miguel Casas <mcasas@chromium.org> Cr-Original-Original-Commit-Position: refs/heads/master@{#615571} Reviewed-on: https://chromium-review.googlesource.com/c/1379274 Cr-Original-Commit-Position: refs/heads/master@{#617366} Change-Id: Ibf9a1455f8df4d52b77aee8e01f15c02878947ae Reviewed-on: https://chromium-review.googlesource.com/c/1387391 Cr-Commit-Position: refs/heads/master@{#620025}
477a706a · Miguel Casas · Commit Bot · 5e0d370c · 477a706a · 477a706a
Commit 477a706a authored Jan 04, 2019 by Miguel Casas Committed by Commit Bot Jan 04, 2019
10 changed files
--- a/media/gpu/accelerated_video_decoder.h
+++ b/media/gpu/accelerated_video_decoder.h
@@ -66,11 +66,13 @@ class MEDIA_GPU_EXPORT AcceleratedVideoDecoder {
  // we need a new set of them, or when an error occurs.
  virtual DecodeResult Decode() WARN_UNUSED_RESULT = 0;
-  // Return dimensions/required number of output surfaces that client should
+  // Return dimensions/required number of pictures that client should be ready
-  // be ready to provide for the decoder to function properly.
+  // to provide for the decoder to function properly (of which up to
-  // To be used after Decode() returns kAllocateNewSurfaces.
+  // GetNumReferenceFrames() might be needed for internal decoding). To be used
+  // after Decode() returns kAllocateNewSurfaces.
  virtual gfx::Size GetPicSize() const = 0;
  virtual size_t GetRequiredNumOfPictures() const = 0;
+  virtual size_t GetNumReferenceFrames() const = 0;
  // About 3 secs for 30 fps video. When the new sized keyframe is missed, the
  // decoder cannot decode the frame. The number of frames are skipped until

--- a/media/gpu/h264_decoder.cc
+++ b/media/gpu/h264_decoder.cc
@@ -1444,7 +1444,17 @@ gfx::Size H264Decoder::GetPicSize() const {
 }
 size_t H264Decoder::GetRequiredNumOfPictures() const {
-  return dpb_.max_num_pics() + kPicsInPipeline;
+  constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
+  return GetNumReferenceFrames() + kPicsInPipeline;
+}
+size_t H264Decoder::GetNumReferenceFrames() const {
+  // Use the maximum number of pictures in the Decoded Picture Buffer plus one
+  // for the one being currently egressed.
+  // Another +1 is experimentally needed for high-to-high resolution changes.
+  // TODO(mcasas): Figure out why +2 instead of +1, see crbug.com/909926 and
+  // http://crrev.com/c/1363807/9/media/gpu/h264_decoder.cc#1449.
+  return dpb_.max_num_pics() + 2;
 }
 // static

--- a/media/gpu/h264_decoder.h
+++ b/media/gpu/h264_decoder.h
@@ -168,6 +168,7 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder {
  DecodeResult Decode() override WARN_UNUSED_RESULT;
  gfx::Size GetPicSize() const override;
  size_t GetRequiredNumOfPictures() const override;
+  size_t GetNumReferenceFrames() const override;
  // Return true if we need to start a new picture.
  static bool IsNewPrimaryCodedPicture(const H264Picture* curr_pic,
@@ -182,17 +183,6 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder {
                                             H264Picture* pic);
 private:
-  // We need to keep at most kDPBMaxSize pictures in DPB for
-  // reference/to display later and an additional one for the one currently
-  // being decoded. We also ask for some additional ones since VDA needs
-  // to accumulate a few ready-to-output pictures before it actually starts
-  // displaying and giving them back. +2 instead of +1 because of subjective
-  // smoothness improvement during testing.
-  enum {
-    kPicsInPipeline = limits::kMaxVideoFrames + 2,
-    kMaxNumReqPictures = H264DPB::kDPBMaxSize + kPicsInPipeline,
-  };
  // Internal state of the decoder.
  enum State {
    // After initialization, need an SPS.

--- a/media/gpu/vaapi/vaapi_video_decode_accelerator.cc
+++ b/media/gpu/vaapi/vaapi_video_decode_accelerator.cc
@@ -16,6 +16,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/metrics/histogram_macros.h"
+#include "base/numerics/ranges.h"
 #include "base/stl_util.h"
 #include "base/strings/string_util.h"
 #include "base/synchronization/waitable_event.h"
@@ -174,11 +175,14 @@ VaapiVideoDecodeAccelerator::VaapiVideoDecodeAccelerator(
      vaapi_picture_factory_(new VaapiPictureFactory()),
      surfaces_available_(&lock_),
      decode_using_client_picture_buffers_(false),
+      use_reduced_number_of_allocations_(false),
      task_runner_(base::ThreadTaskRunnerHandle::Get()),
      decoder_thread_("VaapiDecoderThread"),
      finish_flush_pending_(false),
      awaiting_va_surfaces_recycle_(false),
      requested_num_pics_(0),
+      requested_num_reference_frames_(0),
+      previously_requested_num_reference_frames_(0),
      profile_(VIDEO_CODEC_PROFILE_UNKNOWN),
      make_context_current_cb_(make_context_current_cb),
      bind_image_cb_(bind_image_cb),
@@ -250,6 +254,10 @@ bool VaapiVideoDecodeAccelerator::Initialize(const Config& config,
  output_mode_ = config.output_mode;
  decode_using_client_picture_buffers_ =
      ShouldDecodeOnclientPictureBuffers(output_mode_, profile_);
+  use_reduced_number_of_allocations_ =
+      !decode_using_client_picture_buffers_ &&
+      output_mode_ == Config::OutputMode::ALLOCATE;
+  previously_requested_num_reference_frames_ = 0;
  return true;
 }
@@ -478,7 +486,8 @@ void VaapiVideoDecodeAccelerator::DecodeTask() {
            FROM_HERE,
            base::Bind(&VaapiVideoDecodeAccelerator::InitiateSurfaceSetChange,
                       weak_this_, decoder_->GetRequiredNumOfPictures(),
-                       decoder_->GetPicSize()));
+                       decoder_->GetPicSize(),
+                       decoder_->GetNumReferenceFrames()));
        // We'll get rescheduled once ProvidePictureBuffers() finishes.
        return;
@@ -515,23 +524,37 @@ void VaapiVideoDecodeAccelerator::DecodeTask() {
  }
 }
-void VaapiVideoDecodeAccelerator::InitiateSurfaceSetChange(size_t num_pics,
+void VaapiVideoDecodeAccelerator::InitiateSurfaceSetChange(
-                                                           gfx::Size size) {
+    size_t num_pics,
+    gfx::Size size,
+    size_t num_reference_frames) {
  DCHECK(task_runner_->BelongsToCurrentThread());
  DCHECK(!awaiting_va_surfaces_recycle_);
+  DCHECK_GT(num_pics, num_reference_frames);
  // At this point decoder has stopped running and has already posted onto our
  // loop any remaining output request callbacks, which executed before we got
-  // here. Some of them might have been pended though, because we might not
+  // here. Some of them might have been pended though, because we might not have
-  // have had enough TFPictures to output surfaces to. Initiate a wait cycle,
+  // had enough PictureBuffers to output surfaces to. Initiate a wait cycle,
  // which will wait for client to return enough PictureBuffers to us, so that
  // we can finish all pending output callbacks, releasing associated surfaces.
-  VLOGF(2) << "Initiating surface set change";
  awaiting_va_surfaces_recycle_ = true;
-  requested_num_pics_ = num_pics;
  requested_pic_size_ = size;
+  // If we can |use_reduced_number_of_allocations_|, split the requested
+  // |num_pics| between VA reference frames and client PictureBuffers proper.
+  if (use_reduced_number_of_allocations_)
+    requested_num_reference_frames_ = num_reference_frames;
+  else
+    requested_num_reference_frames_ = 0;
+  requested_num_pics_ = num_pics - requested_num_reference_frames_;
+  VLOGF(2) << " |requested_num_pics_| = " << requested_num_pics_
+           << "; |requested_num_reference_frames_| = "
+           << requested_num_reference_frames_;
  TryFinishSurfaceSetChange();
 }
@@ -542,14 +565,19 @@ void VaapiVideoDecodeAccelerator::TryFinishSurfaceSetChange() {
    return;
  base::AutoLock auto_lock(lock_);
+  const size_t expected_max_available_va_surfaces =
+      use_reduced_number_of_allocations_
+          ? previously_requested_num_reference_frames_
+          : pictures_.size();
  if (!pending_output_cbs_.empty() ||
-      pictures_.size() != available_va_surfaces_.size()) {
+      expected_max_available_va_surfaces != available_va_surfaces_.size()) {
-    // Either:
+    // If we're here the stream resolution has changed; we need to wait until:
-    // 1. Not all pending pending output callbacks have been executed yet.
+    // - all |pending_output_cbs_| have been executed
-    // Wait for the client to return enough pictures and retry later.
+    // - all VASurfaces are back to |available_va_surfaces_|; we can't use
-    // 2. The above happened and all surface release callbacks have been posted
+    //   |requested_num_reference_frames_| for comparison, since it might have
-    // as the result, but not all have executed yet. Post ourselves after them
+    //   changed in the previous call to InitiateSurfaceSetChange(), so we use
-    // to let them release surfaces.
+    //   |previously_requested_num_reference_frames_| instead.
    DVLOGF(2) << "Awaiting pending output/surface release callbacks to finish";
    task_runner_->PostTask(
        FROM_HERE,
@@ -558,6 +586,8 @@ void VaapiVideoDecodeAccelerator::TryFinishSurfaceSetChange() {
    return;
  }
+  previously_requested_num_reference_frames_ = requested_num_reference_frames_;
  // All surfaces released, destroy them and dismiss all PictureBuffers.
  awaiting_va_surfaces_recycle_ = false;
  available_va_surfaces_.clear();
@@ -581,6 +611,7 @@ void VaapiVideoDecodeAccelerator::TryFinishSurfaceSetChange() {
      base::BindOnce(&Client::ProvidePictureBuffers, client_,
                     requested_num_pics_, format, 1, requested_pic_size_,
                     vaapi_picture_factory_->GetGLTextureTarget()));
+  // |client_| may respond via AssignPictureBuffers().
 }
 void VaapiVideoDecodeAccelerator::Decode(
@@ -675,14 +706,19 @@ void VaapiVideoDecodeAccelerator::AssignPictureBuffers(
    RETURN_AND_NOTIFY_ON_FAILURE(
        vaapi_wrapper_->CreateContext(va_format, requested_pic_size_),
        "Failed creating VA Context", PLATFORM_FAILURE, );
+    DCHECK_EQ(va_surface_ids.size(), buffers.size());
  } else {
+    const size_t requested_num_surfaces = use_reduced_number_of_allocations_
+                                              ? requested_num_reference_frames_
+                                              : pictures_.size();
+    CHECK_NE(requested_num_surfaces, 0u);
    va_surface_ids.clear();
-    RETURN_AND_NOTIFY_ON_FAILURE(
+    RETURN_AND_NOTIFY_ON_FAILURE(vaapi_wrapper_->CreateContextAndSurfaces(
-        vaapi_wrapper_->CreateContextAndSurfaces(
+                                     va_format, requested_pic_size_,
-            va_format, requested_pic_size_, buffers.size(), &va_surface_ids),
+                                     requested_num_surfaces, &va_surface_ids),
-        "Failed creating VA Surfaces", PLATFORM_FAILURE, );
+                                 "Failed creating VA Surfaces",
+                                 PLATFORM_FAILURE, );
  }
-  DCHECK_EQ(va_surface_ids.size(), buffers.size());
  available_va_surfaces_.assign(va_surface_ids.begin(), va_surface_ids.end());
@@ -995,9 +1031,12 @@ scoped_refptr<VASurface> VaapiVideoDecodeAccelerator::CreateSurface() {
    const VASurfaceID id = available_va_surfaces_.front();
    available_va_surfaces_.pop_front();
-    TRACE_COUNTER_ID2("media,gpu", "Vaapi VASurfaceIDs", this, "used",
+    TRACE_COUNTER_ID2(
-                      pictures_.size() - available_va_surfaces_.size(),
+        "media,gpu", "Vaapi VASurfaceIDs", this, "used",
-                      "available", available_va_surfaces_.size());
+        (use_reduced_number_of_allocations_ ? requested_num_reference_frames_
+                                            : pictures_.size()) -
+            available_va_surfaces_.size(),
+        "available", available_va_surfaces_.size());
    return new VASurface(id, requested_pic_size_,
                         vaapi_wrapper_->va_surface_format(),
@@ -1027,13 +1066,17 @@ scoped_refptr<VASurface> VaapiVideoDecodeAccelerator::CreateSurface() {
 void VaapiVideoDecodeAccelerator::RecycleVASurfaceID(
    VASurfaceID va_surface_id) {
  DCHECK(task_runner_->BelongsToCurrentThread());
  {
    base::AutoLock auto_lock(lock_);
    available_va_surfaces_.push_back(va_surface_id);
    if (!decode_using_client_picture_buffers_) {
-      TRACE_COUNTER_ID2("media,gpu", "Vaapi VASurfaceIDs", this, "used",
+      TRACE_COUNTER_ID2(
-                        pictures_.size() - available_va_surfaces_.size(),
+          "media,gpu", "Vaapi VASurfaceIDs", this, "used",
-                        "available", available_va_surfaces_.size());
+          (use_reduced_number_of_allocations_ ? requested_num_reference_frames_
+                                              : pictures_.size()) -
+              available_va_surfaces_.size(),
+          "available", available_va_surfaces_.size());
    }
    surfaces_available_.Signal();
  }
@@ -1045,7 +1088,7 @@ bool VaapiVideoDecodeAccelerator::OnMemoryDump(
    base::trace_event::ProcessMemoryDump* pmd) {
  using base::trace_event::MemoryAllocatorDump;
  base::AutoLock auto_lock(lock_);
-  if (decode_using_client_picture_buffers_ || pictures_.empty())
+  if (decode_using_client_picture_buffers_ || !requested_num_reference_frames_)
    return false;
  auto dump_name = base::StringPrintf("gpu/vaapi/decoder/0x%" PRIxPTR,
@@ -1060,16 +1103,20 @@ bool VaapiVideoDecodeAccelerator::OnMemoryDump(
  const float va_surface_bytes_per_pixel =
      va_surface_format == VA_RT_FORMAT_YUV420 ? kNumBytesPerPixelYUV420
                                               : kNumBytesPerPixelYUV420_10bpp;
-  // Report |pictures_.size()| and the associated memory size.
+  // Report |requested_num_surfaces| and the associated memory size. The
-  // The calculated size is an estimation since we don't know the internal VA
+  // calculated size is an estimation since we don't know the internal VA
  // strides, texture compression, headers, etc, but is a good lower boundary.
-  dump->AddScalar(
+  const size_t requested_num_surfaces = use_reduced_number_of_allocations_
-      MemoryAllocatorDump::kNameSize, MemoryAllocatorDump::kUnitsBytes,
+                                            ? requested_num_reference_frames_
-      static_cast<uint64_t>(pictures_.size() * requested_pic_size_.GetArea() *
+                                            : pictures_.size();
-                            va_surface_bytes_per_pixel));
+  dump->AddScalar(MemoryAllocatorDump::kNameSize,
+                  MemoryAllocatorDump::kUnitsBytes,
+                  static_cast<uint64_t>(requested_num_surfaces *
+                                        requested_pic_size_.GetArea() *
+                                        va_surface_bytes_per_pixel));
  dump->AddScalar(MemoryAllocatorDump::kNameObjectCount,
                  MemoryAllocatorDump::kUnitsObjects,
-                  static_cast<uint64_t>(pictures_.size()));
+                  static_cast<uint64_t>(requested_num_surfaces));
  return true;
 }

--- a/media/gpu/vaapi/vaapi_video_decode_accelerator.h
+++ b/media/gpu/vaapi/vaapi_video_decode_accelerator.h
@@ -178,9 +178,12 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
  // |available_va_surfaces_|
  void RecycleVASurfaceID(VASurfaceID va_surface_id);
-  // Initiate wait cycle for surfaces to be released before we release them
+  // Request a new set of |num_pics| PictureBuffers to be allocated by
-  // and allocate new ones, as requested by the decoder.
+  // |client_|. Up to |num_reference_frames| out of |num_pics_| might be needed
-  void InitiateSurfaceSetChange(size_t num_pics, gfx::Size size);
+  // by |decoder_|.
+  void InitiateSurfaceSetChange(size_t num_pics,
+                                gfx::Size size,
+                                size_t num_reference_frames);
  // Check if the surfaces have been released or post ourselves for later.
  void TryFinishSurfaceSetChange();
@@ -252,9 +255,13 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
  // Only used on |task_runner_|.
  base::queue<base::OnceClosure> pending_output_cbs_;
+  // TODO(crbug.com/912295): Enable these two for IMPORT |output_mode_| as well.
  // Under some circumstances, we can pass to libva our own VASurfaceIDs to
-  // decode onto, which skips one copy. Only used on |task_runner_|.
+  // decode onto, which skips one copy. see https://crbug.com/822346.
  bool decode_using_client_picture_buffers_;
+  // When |decode_using_client_picture_buffers_| is false and under certain
+  // conditions, we can reduce the number of necessary allocated buffers.
+  bool use_reduced_number_of_allocations_;
  // WeakPtr<> pointing to |this| for use in posting tasks from the decoder
  // thread back to the ChildThread.  Because the decoder thread is a member of
@@ -289,10 +296,14 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
  // to be returned before we can free them. Only used on |task_runner_|.
  bool awaiting_va_surfaces_recycle_;
-  // Last requested number/resolution of output picture buffers and their
+  // Last requested number/resolution of output PictureBuffers.
-  // format.
  size_t requested_num_pics_;
  gfx::Size requested_pic_size_;
+  // Max number of reference frames needed by |decoder_|. Only used on
+  // |task_runner_| and when |use_reduced_number_of_allocations_| is true.
+  size_t requested_num_reference_frames_;
+  size_t previously_requested_num_reference_frames_;
  VideoCodecProfile profile_;
  // Callback to make GL context current.

--- a/media/gpu/vaapi/vaapi_video_decode_accelerator_unittest.cc
+++ b/media/gpu/vaapi/vaapi_video_decode_accelerator_unittest.cc
@@ -40,7 +40,7 @@ struct TestParams {
 constexpr int32_t kBitstreamId = 123;
 constexpr size_t kInputSize = 256;
-constexpr size_t kNumPictures = 2;
+constexpr size_t kNumPictures = 4;
 const gfx::Size kPictureSize(64, 48);
 constexpr size_t kNewNumPictures = 3;
@@ -61,6 +61,7 @@ class MockAcceleratedVideoDecoder : public AcceleratedVideoDecoder {
  MOCK_METHOD0(Decode, DecodeResult());
  MOCK_CONST_METHOD0(GetPicSize, gfx::Size());
  MOCK_CONST_METHOD0(GetRequiredNumOfPictures, size_t());
+  MOCK_CONST_METHOD0(GetNumReferenceFrames, size_t());
 };
 class MockVaapiWrapper : public VaapiWrapper {
@@ -153,7 +154,8 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
    decoder_thread_.Start();
    // Don't want to go through a vda_->Initialize() because it binds too many
-    // items of the environment. Instead, just start the decoder thread.
+    // items of the environment. Instead, do all the necessary steps here.
    vda_.decoder_thread_task_runner_ = decoder_thread_.task_runner();
    // Plug in all the mocks and ourselves as the |client_|.
@@ -163,10 +165,15 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
    vda_.vpp_vaapi_wrapper_ = mock_vpp_vaapi_wrapper_;
    vda_.vaapi_picture_factory_.reset(mock_vaapi_picture_factory_);
+    // TODO(crbug.com/917999): add IMPORT mode to test variations.
    vda_.output_mode_ = VideoDecodeAccelerator::Config::OutputMode::ALLOCATE;
    vda_.decode_using_client_picture_buffers_ =
        GetParam().decode_using_client_picture_buffers;
+    vda_.use_reduced_number_of_allocations_ =
+        !vda_.decode_using_client_picture_buffers_ &&
+        vda_.output_mode_ ==
+            VideoDecodeAccelerator::Config::OutputMode::ALLOCATE;
    vda_.state_ = VaapiVideoDecodeAccelerator::kIdle;
  }
@@ -221,6 +228,9 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
    EXPECT_CALL(*mock_decoder_, GetRequiredNumOfPictures())
        .WillOnce(Return(num_pictures));
    EXPECT_CALL(*mock_decoder_, GetPicSize()).WillOnce(Return(picture_size));
+    const size_t kNumReferenceFrames = num_pictures / 2;
+    EXPECT_CALL(*mock_decoder_, GetNumReferenceFrames())
+        .WillOnce(Return(kNumReferenceFrames));
    EXPECT_CALL(*mock_vaapi_wrapper_, DestroyContextAndSurfaces());
    if (expect_dismiss_picture_buffers) {
@@ -228,8 +238,14 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
          .Times(num_picture_buffers_to_dismiss);
    }
+    const size_t expected_num_picture_buffers_requested =
+        vda_.use_reduced_number_of_allocations_
+            ? num_pictures - kNumReferenceFrames
+            : num_pictures;
    EXPECT_CALL(*this,
-                ProvidePictureBuffers(num_pictures, _, 1, picture_size, _))
+                ProvidePictureBuffers(expected_num_picture_buffers_requested, _,
+                                      1, picture_size, _))
        .WillOnce(RunClosure(quit_closure));
    base::SharedMemoryHandle handle;
@@ -263,17 +279,18 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
          MockCreateVaapiPicture(mock_vaapi_wrapper_.get(), picture_size))
          .Times(num_pictures);
    } else {
-      EXPECT_CALL(*mock_vaapi_wrapper_,
+      const size_t kNumReferenceFrames = num_pictures / 2;
-                  CreateContextAndSurfaces(_, picture_size, num_pictures, _))
+      EXPECT_CALL(
+          *mock_vaapi_wrapper_,
+          CreateContextAndSurfaces(_, picture_size, kNumReferenceFrames, _))
          .WillOnce(DoAll(
-              WithArg<3>(Invoke(
+              WithArg<3>(Invoke([kNumReferenceFrames](
-                  [num_pictures](std::vector<VASurfaceID>* va_surface_ids) {
+                                    std::vector<VASurfaceID>* va_surface_ids) {
-                    va_surface_ids->resize(num_pictures);
+                va_surface_ids->resize(kNumReferenceFrames);
-                  })),
+              })),
              Return(true)));
-      EXPECT_CALL(
+      EXPECT_CALL(*mock_vaapi_picture_factory_,
-          *mock_vaapi_picture_factory_,
+                  MockCreateVaapiPicture(_, picture_size))
-          MockCreateVaapiPicture(mock_vpp_vaapi_wrapper_.get(), picture_size))
          .Times(num_pictures);
    }

--- a/media/gpu/vp8_decoder.cc
+++ b/media/gpu/vp8_decoder.cc
@@ -7,6 +7,10 @@
 namespace media {
+namespace {
+constexpr size_t kVP8NumFramesActive = 4;
+};
 VP8Decoder::VP8Accelerator::VP8Accelerator() {}
 VP8Decoder::VP8Accelerator::~VP8Accelerator() {}
@@ -165,9 +169,14 @@ gfx::Size VP8Decoder::GetPicSize() const {
 }
 size_t VP8Decoder::GetRequiredNumOfPictures() const {
-  const size_t kVP8NumFramesActive = 4;
+  constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
-  const size_t kPicsInPipeline = limits::kMaxVideoFrames + 2;
  return kVP8NumFramesActive + kPicsInPipeline;
 }
+size_t VP8Decoder::GetNumReferenceFrames() const {
+  // Maximum number of reference frames needed plus one for the one being
+  // currently egressed.
+  return kVP8NumFramesActive + 1;
+}
 }  // namespace media
--- a/media/gpu/vp8_decoder.h
+++ b/media/gpu/vp8_decoder.h
@@ -72,6 +72,7 @@ class MEDIA_GPU_EXPORT VP8Decoder : public AcceleratedVideoDecoder {
  DecodeResult Decode() override WARN_UNUSED_RESULT;
  gfx::Size GetPicSize() const override;
  size_t GetRequiredNumOfPictures() const override;
+  size_t GetNumReferenceFrames() const override;
 private:
  bool DecodeAndOutputCurrentFrame(scoped_refptr<VP8Picture> pic);

--- a/media/gpu/vp9_decoder.cc
+++ b/media/gpu/vp9_decoder.cc
@@ -261,9 +261,14 @@ gfx::Size VP9Decoder::GetPicSize() const {
 }
 size_t VP9Decoder::GetRequiredNumOfPictures() const {
-  // kMaxVideoFrames to keep higher level media pipeline populated, +2 for the
+  constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
-  // pictures being parsed and decoded currently.
+  return kPicsInPipeline + GetNumReferenceFrames();
-  return limits::kMaxVideoFrames + kVp9NumRefFrames + 2;
+}
+size_t VP9Decoder::GetNumReferenceFrames() const {
+  // Maximum number of reference frames needed plus one for the one being
+  // currently egressed.
+  return kVp9NumRefFrames + 1;
 }
 }  // namespace media
--- a/media/gpu/vp9_decoder.h
+++ b/media/gpu/vp9_decoder.h
@@ -106,6 +106,7 @@ class MEDIA_GPU_EXPORT VP9Decoder : public AcceleratedVideoDecoder {
  DecodeResult Decode() override WARN_UNUSED_RESULT;
  gfx::Size GetPicSize() const override;
  size_t GetRequiredNumOfPictures() const override;
+  size_t GetNumReferenceFrames() const override;
 private:
  // Update ref_frames_ based on the information in current frame header.