Commit 477a706a authored by Miguel Casas's avatar Miguel Casas Committed by Commit Bot

RELAND2: Vaapi decode: split |decoder_|s GetRequiredNumOfPictures()

This CL is a smart-rebase-and-fix of the CLs below: it introduces
a new parameter |use_reduced_number_of_allocations_| to allow for the
new working mode described below and to temporarily circumvent the
GtsExoPlayerTestCases failures (b/121169667 and b/121003733); this
new flag is false when |output_mode_| is IMPORT, so all ARC++ cases
should work bc left untouched.

OTOH a mem savings overview can be found at https://goo.gl/3PaMiA.

Original CL description -----------------------------------------------
Vaapi decode: split |decoder_|s GetRequiredNumOfPictures()

This CL reduces the amount of PictureBuffers requested to be allocated
by the |client_| when we are not |decode_using_client_picture_buffers_|.
Instead, it "splits" the requested allocations into
- the actual needed PictureBuffers (A)
- the codec's requested reference frames (B) (a new method
 GetNumReferenceFrames() is added to AcceleratedVideoDecoder for this).

This splitting saves a lot of memory, since we allocate A+B buffers
instead of 2*(A+B). (B is 5 and A is 4-VP8, 4-12 H264/VP9)

Test: crosvideo changing resolutions for each codec, v_d_a_unittest
on nocturne (KBL) and caroline (SKL).

Bug: 912295

Reviewed-on: https://chromium-review.googlesource.com/c/1363807Reviewed-by: default avatarHirokazu Honda <hiroh@chromium.org>
Commit-Queue: Miguel Casas <mcasas@chromium.org>
Cr-Original-Original-Commit-Position: refs/heads/master@{#615571}
Reviewed-on: https://chromium-review.googlesource.com/c/1379274
Cr-Original-Commit-Position: refs/heads/master@{#617366}
Change-Id: Ibf9a1455f8df4d52b77aee8e01f15c02878947ae
Reviewed-on: https://chromium-review.googlesource.com/c/1387391
Cr-Commit-Position: refs/heads/master@{#620025}
parent 5e0d370c
...@@ -66,11 +66,13 @@ class MEDIA_GPU_EXPORT AcceleratedVideoDecoder { ...@@ -66,11 +66,13 @@ class MEDIA_GPU_EXPORT AcceleratedVideoDecoder {
// we need a new set of them, or when an error occurs. // we need a new set of them, or when an error occurs.
virtual DecodeResult Decode() WARN_UNUSED_RESULT = 0; virtual DecodeResult Decode() WARN_UNUSED_RESULT = 0;
// Return dimensions/required number of output surfaces that client should // Return dimensions/required number of pictures that client should be ready
// be ready to provide for the decoder to function properly. // to provide for the decoder to function properly (of which up to
// To be used after Decode() returns kAllocateNewSurfaces. // GetNumReferenceFrames() might be needed for internal decoding). To be used
// after Decode() returns kAllocateNewSurfaces.
virtual gfx::Size GetPicSize() const = 0; virtual gfx::Size GetPicSize() const = 0;
virtual size_t GetRequiredNumOfPictures() const = 0; virtual size_t GetRequiredNumOfPictures() const = 0;
virtual size_t GetNumReferenceFrames() const = 0;
// About 3 secs for 30 fps video. When the new sized keyframe is missed, the // About 3 secs for 30 fps video. When the new sized keyframe is missed, the
// decoder cannot decode the frame. The number of frames are skipped until // decoder cannot decode the frame. The number of frames are skipped until
......
...@@ -1444,7 +1444,17 @@ gfx::Size H264Decoder::GetPicSize() const { ...@@ -1444,7 +1444,17 @@ gfx::Size H264Decoder::GetPicSize() const {
} }
size_t H264Decoder::GetRequiredNumOfPictures() const { size_t H264Decoder::GetRequiredNumOfPictures() const {
return dpb_.max_num_pics() + kPicsInPipeline; constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
return GetNumReferenceFrames() + kPicsInPipeline;
}
size_t H264Decoder::GetNumReferenceFrames() const {
// Use the maximum number of pictures in the Decoded Picture Buffer plus one
// for the one being currently egressed.
// Another +1 is experimentally needed for high-to-high resolution changes.
// TODO(mcasas): Figure out why +2 instead of +1, see crbug.com/909926 and
// http://crrev.com/c/1363807/9/media/gpu/h264_decoder.cc#1449.
return dpb_.max_num_pics() + 2;
} }
// static // static
......
...@@ -168,6 +168,7 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder { ...@@ -168,6 +168,7 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder {
DecodeResult Decode() override WARN_UNUSED_RESULT; DecodeResult Decode() override WARN_UNUSED_RESULT;
gfx::Size GetPicSize() const override; gfx::Size GetPicSize() const override;
size_t GetRequiredNumOfPictures() const override; size_t GetRequiredNumOfPictures() const override;
size_t GetNumReferenceFrames() const override;
// Return true if we need to start a new picture. // Return true if we need to start a new picture.
static bool IsNewPrimaryCodedPicture(const H264Picture* curr_pic, static bool IsNewPrimaryCodedPicture(const H264Picture* curr_pic,
...@@ -182,17 +183,6 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder { ...@@ -182,17 +183,6 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder {
H264Picture* pic); H264Picture* pic);
private: private:
// We need to keep at most kDPBMaxSize pictures in DPB for
// reference/to display later and an additional one for the one currently
// being decoded. We also ask for some additional ones since VDA needs
// to accumulate a few ready-to-output pictures before it actually starts
// displaying and giving them back. +2 instead of +1 because of subjective
// smoothness improvement during testing.
enum {
kPicsInPipeline = limits::kMaxVideoFrames + 2,
kMaxNumReqPictures = H264DPB::kDPBMaxSize + kPicsInPipeline,
};
// Internal state of the decoder. // Internal state of the decoder.
enum State { enum State {
// After initialization, need an SPS. // After initialization, need an SPS.
......
...@@ -178,9 +178,12 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator ...@@ -178,9 +178,12 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
// |available_va_surfaces_| // |available_va_surfaces_|
void RecycleVASurfaceID(VASurfaceID va_surface_id); void RecycleVASurfaceID(VASurfaceID va_surface_id);
// Initiate wait cycle for surfaces to be released before we release them // Request a new set of |num_pics| PictureBuffers to be allocated by
// and allocate new ones, as requested by the decoder. // |client_|. Up to |num_reference_frames| out of |num_pics_| might be needed
void InitiateSurfaceSetChange(size_t num_pics, gfx::Size size); // by |decoder_|.
void InitiateSurfaceSetChange(size_t num_pics,
gfx::Size size,
size_t num_reference_frames);
// Check if the surfaces have been released or post ourselves for later. // Check if the surfaces have been released or post ourselves for later.
void TryFinishSurfaceSetChange(); void TryFinishSurfaceSetChange();
...@@ -252,9 +255,13 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator ...@@ -252,9 +255,13 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
// Only used on |task_runner_|. // Only used on |task_runner_|.
base::queue<base::OnceClosure> pending_output_cbs_; base::queue<base::OnceClosure> pending_output_cbs_;
// TODO(crbug.com/912295): Enable these two for IMPORT |output_mode_| as well.
// Under some circumstances, we can pass to libva our own VASurfaceIDs to // Under some circumstances, we can pass to libva our own VASurfaceIDs to
// decode onto, which skips one copy. Only used on |task_runner_|. // decode onto, which skips one copy. see https://crbug.com/822346.
bool decode_using_client_picture_buffers_; bool decode_using_client_picture_buffers_;
// When |decode_using_client_picture_buffers_| is false and under certain
// conditions, we can reduce the number of necessary allocated buffers.
bool use_reduced_number_of_allocations_;
// WeakPtr<> pointing to |this| for use in posting tasks from the decoder // WeakPtr<> pointing to |this| for use in posting tasks from the decoder
// thread back to the ChildThread. Because the decoder thread is a member of // thread back to the ChildThread. Because the decoder thread is a member of
...@@ -289,10 +296,14 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator ...@@ -289,10 +296,14 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
// to be returned before we can free them. Only used on |task_runner_|. // to be returned before we can free them. Only used on |task_runner_|.
bool awaiting_va_surfaces_recycle_; bool awaiting_va_surfaces_recycle_;
// Last requested number/resolution of output picture buffers and their // Last requested number/resolution of output PictureBuffers.
// format.
size_t requested_num_pics_; size_t requested_num_pics_;
gfx::Size requested_pic_size_; gfx::Size requested_pic_size_;
// Max number of reference frames needed by |decoder_|. Only used on
// |task_runner_| and when |use_reduced_number_of_allocations_| is true.
size_t requested_num_reference_frames_;
size_t previously_requested_num_reference_frames_;
VideoCodecProfile profile_; VideoCodecProfile profile_;
// Callback to make GL context current. // Callback to make GL context current.
......
...@@ -40,7 +40,7 @@ struct TestParams { ...@@ -40,7 +40,7 @@ struct TestParams {
constexpr int32_t kBitstreamId = 123; constexpr int32_t kBitstreamId = 123;
constexpr size_t kInputSize = 256; constexpr size_t kInputSize = 256;
constexpr size_t kNumPictures = 2; constexpr size_t kNumPictures = 4;
const gfx::Size kPictureSize(64, 48); const gfx::Size kPictureSize(64, 48);
constexpr size_t kNewNumPictures = 3; constexpr size_t kNewNumPictures = 3;
...@@ -61,6 +61,7 @@ class MockAcceleratedVideoDecoder : public AcceleratedVideoDecoder { ...@@ -61,6 +61,7 @@ class MockAcceleratedVideoDecoder : public AcceleratedVideoDecoder {
MOCK_METHOD0(Decode, DecodeResult()); MOCK_METHOD0(Decode, DecodeResult());
MOCK_CONST_METHOD0(GetPicSize, gfx::Size()); MOCK_CONST_METHOD0(GetPicSize, gfx::Size());
MOCK_CONST_METHOD0(GetRequiredNumOfPictures, size_t()); MOCK_CONST_METHOD0(GetRequiredNumOfPictures, size_t());
MOCK_CONST_METHOD0(GetNumReferenceFrames, size_t());
}; };
class MockVaapiWrapper : public VaapiWrapper { class MockVaapiWrapper : public VaapiWrapper {
...@@ -153,7 +154,8 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>, ...@@ -153,7 +154,8 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
decoder_thread_.Start(); decoder_thread_.Start();
// Don't want to go through a vda_->Initialize() because it binds too many // Don't want to go through a vda_->Initialize() because it binds too many
// items of the environment. Instead, just start the decoder thread. // items of the environment. Instead, do all the necessary steps here.
vda_.decoder_thread_task_runner_ = decoder_thread_.task_runner(); vda_.decoder_thread_task_runner_ = decoder_thread_.task_runner();
// Plug in all the mocks and ourselves as the |client_|. // Plug in all the mocks and ourselves as the |client_|.
...@@ -163,10 +165,15 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>, ...@@ -163,10 +165,15 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
vda_.vpp_vaapi_wrapper_ = mock_vpp_vaapi_wrapper_; vda_.vpp_vaapi_wrapper_ = mock_vpp_vaapi_wrapper_;
vda_.vaapi_picture_factory_.reset(mock_vaapi_picture_factory_); vda_.vaapi_picture_factory_.reset(mock_vaapi_picture_factory_);
// TODO(crbug.com/917999): add IMPORT mode to test variations.
vda_.output_mode_ = VideoDecodeAccelerator::Config::OutputMode::ALLOCATE; vda_.output_mode_ = VideoDecodeAccelerator::Config::OutputMode::ALLOCATE;
vda_.decode_using_client_picture_buffers_ = vda_.decode_using_client_picture_buffers_ =
GetParam().decode_using_client_picture_buffers; GetParam().decode_using_client_picture_buffers;
vda_.use_reduced_number_of_allocations_ =
!vda_.decode_using_client_picture_buffers_ &&
vda_.output_mode_ ==
VideoDecodeAccelerator::Config::OutputMode::ALLOCATE;
vda_.state_ = VaapiVideoDecodeAccelerator::kIdle; vda_.state_ = VaapiVideoDecodeAccelerator::kIdle;
} }
...@@ -221,6 +228,9 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>, ...@@ -221,6 +228,9 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
EXPECT_CALL(*mock_decoder_, GetRequiredNumOfPictures()) EXPECT_CALL(*mock_decoder_, GetRequiredNumOfPictures())
.WillOnce(Return(num_pictures)); .WillOnce(Return(num_pictures));
EXPECT_CALL(*mock_decoder_, GetPicSize()).WillOnce(Return(picture_size)); EXPECT_CALL(*mock_decoder_, GetPicSize()).WillOnce(Return(picture_size));
const size_t kNumReferenceFrames = num_pictures / 2;
EXPECT_CALL(*mock_decoder_, GetNumReferenceFrames())
.WillOnce(Return(kNumReferenceFrames));
EXPECT_CALL(*mock_vaapi_wrapper_, DestroyContextAndSurfaces()); EXPECT_CALL(*mock_vaapi_wrapper_, DestroyContextAndSurfaces());
if (expect_dismiss_picture_buffers) { if (expect_dismiss_picture_buffers) {
...@@ -228,8 +238,14 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>, ...@@ -228,8 +238,14 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
.Times(num_picture_buffers_to_dismiss); .Times(num_picture_buffers_to_dismiss);
} }
const size_t expected_num_picture_buffers_requested =
vda_.use_reduced_number_of_allocations_
? num_pictures - kNumReferenceFrames
: num_pictures;
EXPECT_CALL(*this, EXPECT_CALL(*this,
ProvidePictureBuffers(num_pictures, _, 1, picture_size, _)) ProvidePictureBuffers(expected_num_picture_buffers_requested, _,
1, picture_size, _))
.WillOnce(RunClosure(quit_closure)); .WillOnce(RunClosure(quit_closure));
base::SharedMemoryHandle handle; base::SharedMemoryHandle handle;
...@@ -263,17 +279,18 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>, ...@@ -263,17 +279,18 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
MockCreateVaapiPicture(mock_vaapi_wrapper_.get(), picture_size)) MockCreateVaapiPicture(mock_vaapi_wrapper_.get(), picture_size))
.Times(num_pictures); .Times(num_pictures);
} else { } else {
EXPECT_CALL(*mock_vaapi_wrapper_, const size_t kNumReferenceFrames = num_pictures / 2;
CreateContextAndSurfaces(_, picture_size, num_pictures, _)) EXPECT_CALL(
*mock_vaapi_wrapper_,
CreateContextAndSurfaces(_, picture_size, kNumReferenceFrames, _))
.WillOnce(DoAll( .WillOnce(DoAll(
WithArg<3>(Invoke( WithArg<3>(Invoke([kNumReferenceFrames](
[num_pictures](std::vector<VASurfaceID>* va_surface_ids) { std::vector<VASurfaceID>* va_surface_ids) {
va_surface_ids->resize(num_pictures); va_surface_ids->resize(kNumReferenceFrames);
})), })),
Return(true))); Return(true)));
EXPECT_CALL( EXPECT_CALL(*mock_vaapi_picture_factory_,
*mock_vaapi_picture_factory_, MockCreateVaapiPicture(_, picture_size))
MockCreateVaapiPicture(mock_vpp_vaapi_wrapper_.get(), picture_size))
.Times(num_pictures); .Times(num_pictures);
} }
......
...@@ -7,6 +7,10 @@ ...@@ -7,6 +7,10 @@
namespace media { namespace media {
namespace {
constexpr size_t kVP8NumFramesActive = 4;
};
VP8Decoder::VP8Accelerator::VP8Accelerator() {} VP8Decoder::VP8Accelerator::VP8Accelerator() {}
VP8Decoder::VP8Accelerator::~VP8Accelerator() {} VP8Decoder::VP8Accelerator::~VP8Accelerator() {}
...@@ -165,9 +169,14 @@ gfx::Size VP8Decoder::GetPicSize() const { ...@@ -165,9 +169,14 @@ gfx::Size VP8Decoder::GetPicSize() const {
} }
size_t VP8Decoder::GetRequiredNumOfPictures() const { size_t VP8Decoder::GetRequiredNumOfPictures() const {
const size_t kVP8NumFramesActive = 4; constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
const size_t kPicsInPipeline = limits::kMaxVideoFrames + 2;
return kVP8NumFramesActive + kPicsInPipeline; return kVP8NumFramesActive + kPicsInPipeline;
} }
size_t VP8Decoder::GetNumReferenceFrames() const {
// Maximum number of reference frames needed plus one for the one being
// currently egressed.
return kVP8NumFramesActive + 1;
}
} // namespace media } // namespace media
...@@ -72,6 +72,7 @@ class MEDIA_GPU_EXPORT VP8Decoder : public AcceleratedVideoDecoder { ...@@ -72,6 +72,7 @@ class MEDIA_GPU_EXPORT VP8Decoder : public AcceleratedVideoDecoder {
DecodeResult Decode() override WARN_UNUSED_RESULT; DecodeResult Decode() override WARN_UNUSED_RESULT;
gfx::Size GetPicSize() const override; gfx::Size GetPicSize() const override;
size_t GetRequiredNumOfPictures() const override; size_t GetRequiredNumOfPictures() const override;
size_t GetNumReferenceFrames() const override;
private: private:
bool DecodeAndOutputCurrentFrame(scoped_refptr<VP8Picture> pic); bool DecodeAndOutputCurrentFrame(scoped_refptr<VP8Picture> pic);
......
...@@ -261,9 +261,14 @@ gfx::Size VP9Decoder::GetPicSize() const { ...@@ -261,9 +261,14 @@ gfx::Size VP9Decoder::GetPicSize() const {
} }
size_t VP9Decoder::GetRequiredNumOfPictures() const { size_t VP9Decoder::GetRequiredNumOfPictures() const {
// kMaxVideoFrames to keep higher level media pipeline populated, +2 for the constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
// pictures being parsed and decoded currently. return kPicsInPipeline + GetNumReferenceFrames();
return limits::kMaxVideoFrames + kVp9NumRefFrames + 2; }
size_t VP9Decoder::GetNumReferenceFrames() const {
// Maximum number of reference frames needed plus one for the one being
// currently egressed.
return kVp9NumRefFrames + 1;
} }
} // namespace media } // namespace media
...@@ -106,6 +106,7 @@ class MEDIA_GPU_EXPORT VP9Decoder : public AcceleratedVideoDecoder { ...@@ -106,6 +106,7 @@ class MEDIA_GPU_EXPORT VP9Decoder : public AcceleratedVideoDecoder {
DecodeResult Decode() override WARN_UNUSED_RESULT; DecodeResult Decode() override WARN_UNUSED_RESULT;
gfx::Size GetPicSize() const override; gfx::Size GetPicSize() const override;
size_t GetRequiredNumOfPictures() const override; size_t GetRequiredNumOfPictures() const override;
size_t GetNumReferenceFrames() const override;
private: private:
// Update ref_frames_ based on the information in current frame header. // Update ref_frames_ based on the information in current frame header.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment