Commit 477a706a authored by Miguel Casas's avatar Miguel Casas Committed by Commit Bot

RELAND2: Vaapi decode: split |decoder_|s GetRequiredNumOfPictures()

This CL is a smart-rebase-and-fix of the CLs below: it introduces
a new parameter |use_reduced_number_of_allocations_| to allow for the
new working mode described below and to temporarily circumvent the
GtsExoPlayerTestCases failures (b/121169667 and b/121003733); this
new flag is false when |output_mode_| is IMPORT, so all ARC++ cases
should work bc left untouched.

OTOH a mem savings overview can be found at https://goo.gl/3PaMiA.

Original CL description -----------------------------------------------
Vaapi decode: split |decoder_|s GetRequiredNumOfPictures()

This CL reduces the amount of PictureBuffers requested to be allocated
by the |client_| when we are not |decode_using_client_picture_buffers_|.
Instead, it "splits" the requested allocations into
- the actual needed PictureBuffers (A)
- the codec's requested reference frames (B) (a new method
 GetNumReferenceFrames() is added to AcceleratedVideoDecoder for this).

This splitting saves a lot of memory, since we allocate A+B buffers
instead of 2*(A+B). (B is 5 and A is 4-VP8, 4-12 H264/VP9)

Test: crosvideo changing resolutions for each codec, v_d_a_unittest
on nocturne (KBL) and caroline (SKL).

Bug: 912295

Reviewed-on: https://chromium-review.googlesource.com/c/1363807Reviewed-by: default avatarHirokazu Honda <hiroh@chromium.org>
Commit-Queue: Miguel Casas <mcasas@chromium.org>
Cr-Original-Original-Commit-Position: refs/heads/master@{#615571}
Reviewed-on: https://chromium-review.googlesource.com/c/1379274
Cr-Original-Commit-Position: refs/heads/master@{#617366}
Change-Id: Ibf9a1455f8df4d52b77aee8e01f15c02878947ae
Reviewed-on: https://chromium-review.googlesource.com/c/1387391
Cr-Commit-Position: refs/heads/master@{#620025}
parent 5e0d370c
......@@ -66,11 +66,13 @@ class MEDIA_GPU_EXPORT AcceleratedVideoDecoder {
// we need a new set of them, or when an error occurs.
virtual DecodeResult Decode() WARN_UNUSED_RESULT = 0;
// Return dimensions/required number of output surfaces that client should
// be ready to provide for the decoder to function properly.
// To be used after Decode() returns kAllocateNewSurfaces.
// Return dimensions/required number of pictures that client should be ready
// to provide for the decoder to function properly (of which up to
// GetNumReferenceFrames() might be needed for internal decoding). To be used
// after Decode() returns kAllocateNewSurfaces.
virtual gfx::Size GetPicSize() const = 0;
virtual size_t GetRequiredNumOfPictures() const = 0;
virtual size_t GetNumReferenceFrames() const = 0;
// About 3 secs for 30 fps video. When the new sized keyframe is missed, the
// decoder cannot decode the frame. The number of frames are skipped until
......
......@@ -1444,7 +1444,17 @@ gfx::Size H264Decoder::GetPicSize() const {
}
size_t H264Decoder::GetRequiredNumOfPictures() const {
return dpb_.max_num_pics() + kPicsInPipeline;
constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
return GetNumReferenceFrames() + kPicsInPipeline;
}
size_t H264Decoder::GetNumReferenceFrames() const {
// Use the maximum number of pictures in the Decoded Picture Buffer plus one
// for the one being currently egressed.
// Another +1 is experimentally needed for high-to-high resolution changes.
// TODO(mcasas): Figure out why +2 instead of +1, see crbug.com/909926 and
// http://crrev.com/c/1363807/9/media/gpu/h264_decoder.cc#1449.
return dpb_.max_num_pics() + 2;
}
// static
......
......@@ -168,6 +168,7 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder {
DecodeResult Decode() override WARN_UNUSED_RESULT;
gfx::Size GetPicSize() const override;
size_t GetRequiredNumOfPictures() const override;
size_t GetNumReferenceFrames() const override;
// Return true if we need to start a new picture.
static bool IsNewPrimaryCodedPicture(const H264Picture* curr_pic,
......@@ -182,17 +183,6 @@ class MEDIA_GPU_EXPORT H264Decoder : public AcceleratedVideoDecoder {
H264Picture* pic);
private:
// We need to keep at most kDPBMaxSize pictures in DPB for
// reference/to display later and an additional one for the one currently
// being decoded. We also ask for some additional ones since VDA needs
// to accumulate a few ready-to-output pictures before it actually starts
// displaying and giving them back. +2 instead of +1 because of subjective
// smoothness improvement during testing.
enum {
kPicsInPipeline = limits::kMaxVideoFrames + 2,
kMaxNumReqPictures = H264DPB::kDPBMaxSize + kPicsInPipeline,
};
// Internal state of the decoder.
enum State {
// After initialization, need an SPS.
......
......@@ -178,9 +178,12 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
// |available_va_surfaces_|
void RecycleVASurfaceID(VASurfaceID va_surface_id);
// Initiate wait cycle for surfaces to be released before we release them
// and allocate new ones, as requested by the decoder.
void InitiateSurfaceSetChange(size_t num_pics, gfx::Size size);
// Request a new set of |num_pics| PictureBuffers to be allocated by
// |client_|. Up to |num_reference_frames| out of |num_pics_| might be needed
// by |decoder_|.
void InitiateSurfaceSetChange(size_t num_pics,
gfx::Size size,
size_t num_reference_frames);
// Check if the surfaces have been released or post ourselves for later.
void TryFinishSurfaceSetChange();
......@@ -252,9 +255,13 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
// Only used on |task_runner_|.
base::queue<base::OnceClosure> pending_output_cbs_;
// TODO(crbug.com/912295): Enable these two for IMPORT |output_mode_| as well.
// Under some circumstances, we can pass to libva our own VASurfaceIDs to
// decode onto, which skips one copy. Only used on |task_runner_|.
// decode onto, which skips one copy. see https://crbug.com/822346.
bool decode_using_client_picture_buffers_;
// When |decode_using_client_picture_buffers_| is false and under certain
// conditions, we can reduce the number of necessary allocated buffers.
bool use_reduced_number_of_allocations_;
// WeakPtr<> pointing to |this| for use in posting tasks from the decoder
// thread back to the ChildThread. Because the decoder thread is a member of
......@@ -289,10 +296,14 @@ class MEDIA_GPU_EXPORT VaapiVideoDecodeAccelerator
// to be returned before we can free them. Only used on |task_runner_|.
bool awaiting_va_surfaces_recycle_;
// Last requested number/resolution of output picture buffers and their
// format.
// Last requested number/resolution of output PictureBuffers.
size_t requested_num_pics_;
gfx::Size requested_pic_size_;
// Max number of reference frames needed by |decoder_|. Only used on
// |task_runner_| and when |use_reduced_number_of_allocations_| is true.
size_t requested_num_reference_frames_;
size_t previously_requested_num_reference_frames_;
VideoCodecProfile profile_;
// Callback to make GL context current.
......
......@@ -40,7 +40,7 @@ struct TestParams {
constexpr int32_t kBitstreamId = 123;
constexpr size_t kInputSize = 256;
constexpr size_t kNumPictures = 2;
constexpr size_t kNumPictures = 4;
const gfx::Size kPictureSize(64, 48);
constexpr size_t kNewNumPictures = 3;
......@@ -61,6 +61,7 @@ class MockAcceleratedVideoDecoder : public AcceleratedVideoDecoder {
MOCK_METHOD0(Decode, DecodeResult());
MOCK_CONST_METHOD0(GetPicSize, gfx::Size());
MOCK_CONST_METHOD0(GetRequiredNumOfPictures, size_t());
MOCK_CONST_METHOD0(GetNumReferenceFrames, size_t());
};
class MockVaapiWrapper : public VaapiWrapper {
......@@ -153,7 +154,8 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
decoder_thread_.Start();
// Don't want to go through a vda_->Initialize() because it binds too many
// items of the environment. Instead, just start the decoder thread.
// items of the environment. Instead, do all the necessary steps here.
vda_.decoder_thread_task_runner_ = decoder_thread_.task_runner();
// Plug in all the mocks and ourselves as the |client_|.
......@@ -163,10 +165,15 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
vda_.vpp_vaapi_wrapper_ = mock_vpp_vaapi_wrapper_;
vda_.vaapi_picture_factory_.reset(mock_vaapi_picture_factory_);
// TODO(crbug.com/917999): add IMPORT mode to test variations.
vda_.output_mode_ = VideoDecodeAccelerator::Config::OutputMode::ALLOCATE;
vda_.decode_using_client_picture_buffers_ =
GetParam().decode_using_client_picture_buffers;
vda_.use_reduced_number_of_allocations_ =
!vda_.decode_using_client_picture_buffers_ &&
vda_.output_mode_ ==
VideoDecodeAccelerator::Config::OutputMode::ALLOCATE;
vda_.state_ = VaapiVideoDecodeAccelerator::kIdle;
}
......@@ -221,6 +228,9 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
EXPECT_CALL(*mock_decoder_, GetRequiredNumOfPictures())
.WillOnce(Return(num_pictures));
EXPECT_CALL(*mock_decoder_, GetPicSize()).WillOnce(Return(picture_size));
const size_t kNumReferenceFrames = num_pictures / 2;
EXPECT_CALL(*mock_decoder_, GetNumReferenceFrames())
.WillOnce(Return(kNumReferenceFrames));
EXPECT_CALL(*mock_vaapi_wrapper_, DestroyContextAndSurfaces());
if (expect_dismiss_picture_buffers) {
......@@ -228,8 +238,14 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
.Times(num_picture_buffers_to_dismiss);
}
const size_t expected_num_picture_buffers_requested =
vda_.use_reduced_number_of_allocations_
? num_pictures - kNumReferenceFrames
: num_pictures;
EXPECT_CALL(*this,
ProvidePictureBuffers(num_pictures, _, 1, picture_size, _))
ProvidePictureBuffers(expected_num_picture_buffers_requested, _,
1, picture_size, _))
.WillOnce(RunClosure(quit_closure));
base::SharedMemoryHandle handle;
......@@ -263,17 +279,18 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
MockCreateVaapiPicture(mock_vaapi_wrapper_.get(), picture_size))
.Times(num_pictures);
} else {
EXPECT_CALL(*mock_vaapi_wrapper_,
CreateContextAndSurfaces(_, picture_size, num_pictures, _))
const size_t kNumReferenceFrames = num_pictures / 2;
EXPECT_CALL(
*mock_vaapi_wrapper_,
CreateContextAndSurfaces(_, picture_size, kNumReferenceFrames, _))
.WillOnce(DoAll(
WithArg<3>(Invoke(
[num_pictures](std::vector<VASurfaceID>* va_surface_ids) {
va_surface_ids->resize(num_pictures);
WithArg<3>(Invoke([kNumReferenceFrames](
std::vector<VASurfaceID>* va_surface_ids) {
va_surface_ids->resize(kNumReferenceFrames);
})),
Return(true)));
EXPECT_CALL(
*mock_vaapi_picture_factory_,
MockCreateVaapiPicture(mock_vpp_vaapi_wrapper_.get(), picture_size))
EXPECT_CALL(*mock_vaapi_picture_factory_,
MockCreateVaapiPicture(_, picture_size))
.Times(num_pictures);
}
......
......@@ -7,6 +7,10 @@
namespace media {
namespace {
constexpr size_t kVP8NumFramesActive = 4;
};
VP8Decoder::VP8Accelerator::VP8Accelerator() {}
VP8Decoder::VP8Accelerator::~VP8Accelerator() {}
......@@ -165,9 +169,14 @@ gfx::Size VP8Decoder::GetPicSize() const {
}
size_t VP8Decoder::GetRequiredNumOfPictures() const {
const size_t kVP8NumFramesActive = 4;
const size_t kPicsInPipeline = limits::kMaxVideoFrames + 2;
constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
return kVP8NumFramesActive + kPicsInPipeline;
}
size_t VP8Decoder::GetNumReferenceFrames() const {
// Maximum number of reference frames needed plus one for the one being
// currently egressed.
return kVP8NumFramesActive + 1;
}
} // namespace media
......@@ -72,6 +72,7 @@ class MEDIA_GPU_EXPORT VP8Decoder : public AcceleratedVideoDecoder {
DecodeResult Decode() override WARN_UNUSED_RESULT;
gfx::Size GetPicSize() const override;
size_t GetRequiredNumOfPictures() const override;
size_t GetNumReferenceFrames() const override;
private:
bool DecodeAndOutputCurrentFrame(scoped_refptr<VP8Picture> pic);
......
......@@ -261,9 +261,14 @@ gfx::Size VP9Decoder::GetPicSize() const {
}
size_t VP9Decoder::GetRequiredNumOfPictures() const {
// kMaxVideoFrames to keep higher level media pipeline populated, +2 for the
// pictures being parsed and decoded currently.
return limits::kMaxVideoFrames + kVp9NumRefFrames + 2;
constexpr size_t kPicsInPipeline = limits::kMaxVideoFrames + 1;
return kPicsInPipeline + GetNumReferenceFrames();
}
size_t VP9Decoder::GetNumReferenceFrames() const {
// Maximum number of reference frames needed plus one for the one being
// currently egressed.
return kVp9NumRefFrames + 1;
}
} // namespace media
......@@ -106,6 +106,7 @@ class MEDIA_GPU_EXPORT VP9Decoder : public AcceleratedVideoDecoder {
DecodeResult Decode() override WARN_UNUSED_RESULT;
gfx::Size GetPicSize() const override;
size_t GetRequiredNumOfPictures() const override;
size_t GetNumReferenceFrames() const override;
private:
// Update ref_frames_ based on the information in current frame header.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment