Commit d12004dc authored by Miguel Casas's avatar Miguel Casas Committed by Commit Bot

media/gpu/vaapi: manage VP9 VABufferIDs lifetime in the decoder

Decoding a bitstream in VA has two steps: one, submitting the parsed
parameters and encoded chunk as VABufferIDs, and two, executing the
decode. This CL transfers managing the lifetime of the VABufferIDs to
VP9VaapiVideoDecoderDelegate, avoiding vaCreateBuffer() calls.

To do that this CL reshuffles a bit the VaapiWrapper calls to be able
to call vaCreateBuffer(), MapAndCopy and Execute, in a piecemeal,
configurable fashion.

This is verified via chrome:tracing and codepen.io/full/RwarYvG that
plays 4 1280x572 VP9 videos at the same time. Tracing is captured for
a few seconds, and basically we avoid calling the vaCreateBuffer(),
which takes ~50us every time, reducing the overall decode time from
~3.740ms to ~3.425ms on my BSW (reks), ~10% reduction. (Also tried on
kohaku and numbers are similar albeit smaller bc the SoC is faster).
Improvements are of course extremely small, the advantages of this CL
are in reducing lock/unlock churn and associated contention. This
benefit grows with the amount of decodes (e.g. Meet grid scenarios).

This CL also removes the unused VaapiWrapper::Execute() method.

Bug: b/166646505
Change-Id: Id44dbb21825328dfb4fc47e1c7807bd725efd196
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2404740
Commit-Queue: Miguel Casas <mcasas@chromium.org>
Reviewed-by: default avatarAndres Calderon Jaramillo <andrescj@chromium.org>
Cr-Commit-Position: refs/heads/master@{#808040}
parent 17901dce
......@@ -491,6 +491,11 @@ void VaapiVideoDecodeAccelerator::DecodeTask() {
required_num_of_pictures =
std::max(kMinNumOfPics, required_num_of_pictures);
}
// Notify |decoder_delegate_| of an imminent VAContextID destruction, so
// it can destroy any internal structures making use of it.
decoder_delegate_->OnVAContextDestructionSoon();
task_runner_->PostTask(
FROM_HERE,
base::BindOnce(
......@@ -605,9 +610,8 @@ void VaapiVideoDecodeAccelerator::TryFinishSurfaceSetChange() {
// All surfaces released, destroy them and dismiss all PictureBuffers.
awaiting_va_surfaces_recycle_ = false;
VideoCodecProfile new_profile = decoder_->GetProfile();
const VideoCodecProfile new_profile = decoder_->GetProfile();
if (profile_ != new_profile) {
DCHECK(decoder_delegate_);
profile_ = new_profile;
auto new_vaapi_wrapper = VaapiWrapper::CreateForVideoCodec(
VaapiWrapper::kDecode, profile_,
......@@ -1060,6 +1064,10 @@ void VaapiVideoDecodeAccelerator::Cleanup() {
if (buffer_allocation_mode_ != BufferAllocationMode::kNone)
available_va_surfaces_.clear();
// Notify |decoder_delegate_| of an imminent VAContextID destruction, so it
// can destroy any internal structures making use of it. At this point
// |decoder_thread_| is stopped so we can access these from |task_runner_|.
decoder_delegate_->OnVAContextDestructionSoon();
vaapi_wrapper_->DestroyContext();
if (vpp_vaapi_wrapper_)
......
......@@ -12,6 +12,7 @@
#include "media/gpu/accelerated_video_decoder.h"
#include "media/gpu/vaapi/vaapi_picture.h"
#include "media/gpu/vaapi/vaapi_picture_factory.h"
#include "media/gpu/vaapi/vaapi_video_decoder_delegate.h"
#include "media/gpu/vaapi/vaapi_wrapper.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
......@@ -167,8 +168,12 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
vda_.decoder_thread_task_runner_ = decoder_thread_.task_runner();
decoder_delegate_ =
std::make_unique<VaapiVideoDecoderDelegate>(&vda_, mock_vaapi_wrapper_);
// Plug in all the mocks and ourselves as the |client_|.
vda_.decoder_.reset(mock_decoder_);
vda_.decoder_delegate_ = decoder_delegate_.get();
vda_.client_ = weak_ptr_factory_.GetWeakPtr();
vda_.vaapi_wrapper_ = mock_vaapi_wrapper_;
vda_.vpp_vaapi_wrapper_ = mock_vpp_vaapi_wrapper_;
......@@ -377,6 +382,8 @@ class VaapiVideoDecodeAcceleratorTest : public TestWithParam<TestParams>,
base::test::TaskEnvironment task_environment_;
std::unique_ptr<VaapiVideoDecoderDelegate> decoder_delegate_;
// The class under test and a worker thread for it.
VaapiVideoDecodeAccelerator vda_;
base::Thread decoder_thread_;
......
......@@ -100,6 +100,11 @@ VaapiVideoDecoder::~VaapiVideoDecoder() {
weak_this_factory_.InvalidateWeakPtrs();
// Notify |decoder_delegate_| of an imminent VAContextID destruction, so it
// can destroy any internal structures making use of it.
if (decoder_delegate_)
decoder_delegate_->OnVAContextDestructionSoon();
// Destroy explicitly to DCHECK() that |vaapi_wrapper_| references are held
// inside the accelerator in |decoder_|, by the |allocated_va_surfaces_| and
// of course by this class. To clear |allocated_va_surfaces_| we have to first
......@@ -138,6 +143,10 @@ void VaapiVideoDecoder::Initialize(const VideoDecoderConfig& config,
if (state_ != State::kUninitialized) {
DVLOGF(3) << "Reinitializing decoder";
// Notify |decoder_delegate_| of an imminent VAContextID destruction, so it
// can destroy any internal structures making use of it.
decoder_delegate_->OnVAContextDestructionSoon();
decoder_ = nullptr;
DCHECK(vaapi_wrapper_);
// To clear |allocated_va_surfaces_| we have to first DestroyContext().
......@@ -151,7 +160,7 @@ void VaapiVideoDecoder::Initialize(const VideoDecoderConfig& config,
}
// Initialize VAAPI wrapper.
VideoCodecProfile profile = config.profile();
const VideoCodecProfile profile = config.profile();
vaapi_wrapper_ = VaapiWrapper::CreateForVideoCodec(
VaapiWrapper::kDecode, profile,
base::Bind(&ReportVaapiErrorToUMA, "Media.VaapiVideoDecoder.VAAPIError"));
......@@ -452,6 +461,10 @@ void VaapiVideoDecoder::ApplyResolutionChange() {
return;
}
// Notify |decoder_delegate_| of an imminent VAContextID destruction, so it
// can destroy any internal structures making use of it.
decoder_delegate_->OnVAContextDestructionSoon();
// All pending decode operations will be completed before triggering a
// resolution change, so we can safely DestroyContext() here; that, in turn,
// allows for clearing the |allocated_va_surfaces_|.
......@@ -554,8 +567,12 @@ void VaapiVideoDecoder::Reset(base::OnceClosure reset_cb) {
}
if (state_ == State::kChangingResolution) {
// If we reset during resolution change, re-create AVD. Then the new AVD
// will trigger resolution change again after reset.
// Recreate |decoder_| and |decoder_delegate_| if we are Reset() in the
// interim between calling |client_|s PrepareChangeResolution() and being
// called back on ApplyResolutionChange(), so the latter will find a fresh
// |decoder_|. Also give a chance to |decoder_delegate_| to release its
// internal data structures.
decoder_delegate_->OnVAContextDestructionSoon();
if (!CreateAcceleratedVideoDecoder().is_ok()) {
SetState(State::kError);
std::move(reset_cb).Run();
......
......@@ -30,4 +30,6 @@ void VaapiVideoDecoderDelegate::set_vaapi_wrapper(
vaapi_wrapper_ = std::move(vaapi_wrapper);
}
void VaapiVideoDecoderDelegate::OnVAContextDestructionSoon() {}
} // namespace media
......@@ -25,6 +25,7 @@ class VaapiVideoDecoderDelegate {
virtual ~VaapiVideoDecoderDelegate();
void set_vaapi_wrapper(scoped_refptr<VaapiWrapper> vaapi_wrapper);
virtual void OnVAContextDestructionSoon();
VaapiVideoDecoderDelegate(const VaapiVideoDecoderDelegate&) = delete;
VaapiVideoDecoderDelegate& operator=(const VaapiVideoDecoderDelegate&) =
......
......@@ -1860,11 +1860,34 @@ void VaapiWrapper::DestroyPendingBuffers_Locked() {
bool VaapiWrapper::ExecuteAndDestroyPendingBuffers(VASurfaceID va_surface_id) {
base::AutoLock auto_lock(*va_lock_);
bool result = Execute_Locked(va_surface_id);
bool result = Execute_Locked(va_surface_id, pending_va_buffers_);
DestroyPendingBuffers_Locked();
return result;
}
bool VaapiWrapper::MapAndCopyAndExecute(
VASurfaceID va_surface_id,
const std::vector<std::pair<VABufferID, VABufferDescriptor>>& va_buffers) {
DCHECK_NE(va_surface_id, VA_INVALID_SURFACE);
TRACE_EVENT0("media,gpu", "VaapiWrapper::MapAndCopyAndExecute");
base::AutoLock auto_lock(*va_lock_);
std::vector<VABufferID> va_buffer_ids;
for (const auto& va_buffer : va_buffers) {
const VABufferID va_buffer_id = va_buffer.first;
const VABufferDescriptor& descriptor = va_buffer.second;
DCHECK_NE(va_buffer_id, VA_INVALID_ID);
if (!MapAndCopy_Locked(va_buffer_id, descriptor))
return false;
va_buffer_ids.push_back(va_buffer_id);
}
return Execute_Locked(va_surface_id, va_buffer_ids);
}
#if defined(USE_X11)
bool VaapiWrapper::PutSurfaceIntoPixmap(VASurfaceID va_surface_id,
Pixmap x_pixmap,
......@@ -2434,13 +2457,8 @@ void VaapiWrapper::DestroySurface(VASurfaceID va_surface_id) {
VA_LOG_ON_ERROR(va_res, VaapiFunctions::kVADestroySurfaces);
}
bool VaapiWrapper::Execute(VASurfaceID va_surface_id) {
TRACE_EVENT0("media,gpu", "VaapiWrapper::Execute");
base::AutoLock auto_lock(*va_lock_);
return Execute_Locked(va_surface_id);
}
bool VaapiWrapper::Execute_Locked(VASurfaceID va_surface_id) {
bool VaapiWrapper::Execute_Locked(VASurfaceID va_surface_id,
const std::vector<VABufferID>& va_buffers) {
TRACE_EVENT0("media,gpu", "VaapiWrapper::Execute_Locked");
va_lock_->AssertAcquired();
......@@ -2452,11 +2470,11 @@ bool VaapiWrapper::Execute_Locked(VASurfaceID va_surface_id) {
VAStatus va_res = vaBeginPicture(va_display_, va_context_id_, va_surface_id);
VA_SUCCESS_OR_RETURN(va_res, VaapiFunctions::kVABeginPicture, false);
if (pending_va_buffers_.size() > 0) {
// Commit parameter and slice buffers.
va_res =
vaRenderPicture(va_display_, va_context_id_, &pending_va_buffers_[0],
pending_va_buffers_.size());
if (!va_buffers.empty()) {
// vaRenderPicture() needs a non-const pointer, possibly unnecessarily.
VABufferID* va_buffers_data = const_cast<VABufferID*>(va_buffers.data());
va_res = vaRenderPicture(va_display_, va_context_id_, va_buffers_data,
base::checked_cast<int>(va_buffers.size()));
VA_SUCCESS_OR_RETURN(va_res, VaapiFunctions::kVARenderPicture_VABuffers,
false);
}
......@@ -2496,16 +2514,28 @@ bool VaapiWrapper::SubmitBuffer_Locked(const VABufferDescriptor& va_buffer) {
VA_SUCCESS_OR_RETURN(va_res, VaapiFunctions::kVACreateBuffer, false);
}
if (!MapAndCopy_Locked(buffer_id, va_buffer))
return false;
pending_va_buffers_.push_back(buffer_id);
return true;
}
bool VaapiWrapper::MapAndCopy_Locked(VABufferID va_buffer_id,
const VABufferDescriptor& va_buffer) {
va_lock_->AssertAcquired();
DCHECK_NE(va_buffer_id, VA_INVALID_ID);
DCHECK_LT(va_buffer.type, VABufferTypeMax);
DCHECK(va_buffer.data);
ScopedVABufferMapping mapping(
va_lock_, va_display_, buffer_id,
va_lock_, va_display_, va_buffer_id,
base::BindOnce(base::IgnoreResult(&vaDestroyBuffer), va_display_));
if (!mapping.IsValid())
return false;
memcpy(mapping.data(), va_buffer.data, va_buffer.size);
pending_va_buffers_.push_back(buffer_id);
return true;
return memcpy(mapping.data(), va_buffer.data, va_buffer.size);
}
} // namespace media
......@@ -331,14 +331,20 @@ class MEDIA_GPU_EXPORT VaapiWrapper
size_t size,
const void* buffer);
// Cancel and destroy all buffers queued to the HW codec via SubmitBuffer().
// Useful when a pending job is to be cancelled (on reset or error).
// Destroys all |pending_va_buffers_| sent via SubmitBuffer*(). Useful when a
// pending job is to be cancelled (on reset or error).
void DestroyPendingBuffers();
// Executes job in hardware on target |va_surface_id| and destroys pending
// buffers. Returns false if Execute() fails.
virtual bool ExecuteAndDestroyPendingBuffers(VASurfaceID va_surface_id);
// Maps each |va_buffers| ID and copies the data described by the associated
// VABufferDescriptor into it; then calls Execute_Locked() on |va_surface_id|.
bool MapAndCopyAndExecute(
VASurfaceID va_surface_id,
const std::vector<std::pair<VABufferID, VABufferDescriptor>>& va_buffers);
#if defined(USE_X11)
// Put data from |va_surface_id| into |x_pixmap| of size
// |dest_size|, converting/scaling to it.
......@@ -437,20 +443,25 @@ class MEDIA_GPU_EXPORT VaapiWrapper
size_t num_surfaces,
std::vector<VASurfaceID>* va_surfaces);
// Execute pending job in hardware and destroy pending buffers. Return false
// if vaapi driver refuses to accept parameter or slice buffers submitted
// by client, or if execution fails in hardware.
bool Execute(VASurfaceID va_surface_id);
bool Execute_Locked(VASurfaceID va_surface_id)
// Carries out the vaBeginPicture()-vaRenderPicture()-vaEndPicture() on target
// |va_surface_id|. Returns false if any of these calls fails.
bool Execute_Locked(VASurfaceID va_surface_id,
const std::vector<VABufferID>& va_buffers)
EXCLUSIVE_LOCKS_REQUIRED(va_lock_);
void DestroyPendingBuffers_Locked() EXCLUSIVE_LOCKS_REQUIRED(va_lock_);
// Requests libva to allocate a new VABufferID of type |va_buffer.type|, maps
// it and copies |va_buffer.size| contents of |va_buffer.data| to it.
// Requests libva to allocate a new VABufferID of type |va_buffer.type|, then
// maps-and-copies |va_buffer.size| contents of |va_buffer.data| to it.
bool SubmitBuffer_Locked(const VABufferDescriptor& va_buffer)
EXCLUSIVE_LOCKS_REQUIRED(va_lock_);
// Maps |va_buffer_id| and, if successful, copies the contents of |va_buffer|
// into it.
bool MapAndCopy_Locked(VABufferID va_buffer_id,
const VABufferDescriptor& va_buffer)
EXCLUSIVE_LOCKS_REQUIRED(va_lock_);
const CodecMode mode_;
// Pointer to VADisplayState's member |va_lock_|. Guaranteed to be valid for
......
......@@ -21,7 +21,11 @@ VP9VaapiVideoDecoderDelegate::VP9VaapiVideoDecoderDelegate(
scoped_refptr<VaapiWrapper> vaapi_wrapper)
: VaapiVideoDecoderDelegate(vaapi_dec, std::move(vaapi_wrapper)) {}
VP9VaapiVideoDecoderDelegate::~VP9VaapiVideoDecoderDelegate() = default;
VP9VaapiVideoDecoderDelegate::~VP9VaapiVideoDecoderDelegate() {
DCHECK(!picture_params_);
DCHECK(!slice_params_);
DCHECK(!encoded_data_);
}
scoped_refptr<VP9Picture> VP9VaapiVideoDecoderDelegate::CreateVP9Picture() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
......@@ -43,12 +47,33 @@ bool VP9VaapiVideoDecoderDelegate::SubmitDecode(
// |done_cb| should be null as we return false from IsFrameContextRequired().
DCHECK(!done_cb);
VADecPictureParameterBufferVP9 pic_param;
memset(&pic_param, 0, sizeof(pic_param));
const Vp9FrameHeader* frame_hdr = pic->frame_hdr.get();
DCHECK(frame_hdr);
VADecPictureParameterBufferVP9 pic_param{};
VASliceParameterBufferVP9 slice_param{};
if (!picture_params_) {
picture_params_ = vaapi_wrapper_->CreateVABuffer(
VAPictureParameterBufferType, sizeof(pic_param));
if (!picture_params_)
return false;
}
if (!slice_params_) {
slice_params_ = vaapi_wrapper_->CreateVABuffer(VASliceParameterBufferType,
sizeof(slice_param));
if (!slice_params_)
return false;
}
// |encoded_data_| has to match perfectly |frame_hdr->frame_size| or decoding
// will have horrific artifacts.
if (!encoded_data_ || encoded_data_->size() != frame_hdr->frame_size) {
encoded_data_ = vaapi_wrapper_->CreateVABuffer(VASliceDataBufferType,
frame_hdr->frame_size);
if (!encoded_data_)
return false;
}
pic_param.frame_width = base::checked_cast<uint16_t>(frame_hdr->frame_width);
pic_param.frame_height =
base::checked_cast<uint16_t>(frame_hdr->frame_height);
......@@ -108,8 +133,6 @@ bool VP9VaapiVideoDecoderDelegate::SubmitDecode(
DCHECK((pic_param.profile == 0 && pic_param.bit_depth == 8) ||
(pic_param.profile == 2 && pic_param.bit_depth == 10));
VASliceParameterBufferVP9 slice_param;
memset(&slice_param, 0, sizeof(slice_param));
slice_param.slice_data_size = frame_hdr->frame_size;
slice_param.slice_data_offset = 0;
slice_param.slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
......@@ -138,15 +161,14 @@ bool VP9VaapiVideoDecoderDelegate::SubmitDecode(
seg_param.chroma_ac_quant_scale = seg.uv_dequant[i][1];
}
if (!vaapi_wrapper_->SubmitBuffers(
{{VAPictureParameterBufferType, sizeof(pic_param), &pic_param},
{VASliceParameterBufferType, sizeof(slice_param), &slice_param},
{VASliceDataBufferType, frame_hdr->frame_size, frame_hdr->data}})) {
return false;
}
return vaapi_wrapper_->ExecuteAndDestroyPendingBuffers(
pic->AsVaapiVP9Picture()->va_surface()->id());
return vaapi_wrapper_->MapAndCopyAndExecute(
pic->AsVaapiVP9Picture()->va_surface()->id(),
{{picture_params_->id(),
{picture_params_->type(), picture_params_->size(), &pic_param}},
{slice_params_->id(),
{slice_params_->type(), slice_params_->size(), &slice_param}},
{encoded_data_->id(),
{encoded_data_->type(), frame_hdr->frame_size, frame_hdr->data}}});
}
bool VP9VaapiVideoDecoderDelegate::OutputPicture(
......@@ -172,4 +194,12 @@ bool VP9VaapiVideoDecoderDelegate::GetFrameContext(
return false;
}
void VP9VaapiVideoDecoderDelegate::OnVAContextDestructionSoon() {
// Destroy the member ScopedVABuffers below since they refer to a VAContextID
// that will be destroyed soon.
picture_params_.reset();
slice_params_.reset();
encoded_data_.reset();
}
} // namespace media
......@@ -13,6 +13,7 @@
namespace media {
class ScopedVABuffer;
class VP9Picture;
class VP9VaapiVideoDecoderDelegate : public VP9Decoder::VP9Accelerator,
......@@ -35,6 +36,14 @@ class VP9VaapiVideoDecoderDelegate : public VP9Decoder::VP9Accelerator,
bool GetFrameContext(scoped_refptr<VP9Picture> pic,
Vp9FrameContext* frame_ctx) override;
// VaapiVideoDecoderDelegate impl.
void OnVAContextDestructionSoon() override;
private:
std::unique_ptr<ScopedVABuffer> picture_params_;
std::unique_ptr<ScopedVABuffer> slice_params_;
std::unique_ptr<ScopedVABuffer> encoded_data_;
DISALLOW_COPY_AND_ASSIGN(VP9VaapiVideoDecoderDelegate);
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment