Commit 8a5df4f7 authored by Alexandre Courbot's avatar Alexandre Courbot Committed by Commit Bot

media/gpu/v4l2svda: manage requests as a FIFO queue

We used to strongly associate requests to input buffers, but doing so is
sub-optimal: a request may not be completed even if its associated input
buffer has been dequeued, meaning we cannot reuse the request just yet.

This CL addresses this by dissociated requests from input buffers and
putting them into a FIFO queue: when constructing a
V4L2RequestDecodeSurface, we always take to least recently used request,
maximizing its odds of being completed at the time.

Bug: 917279
Test: VDA unittest passing on Kukui with required IP changes.
Change-Id: I3e84c1d9120cdded3876fa8f4a91fad964563ac5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1731011
Commit-Queue: Alexandre Courbot <acourbot@chromium.org>
Reviewed-by: default avatarHirokazu Honda <hiroh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#683532}
parent 95f9b23d
...@@ -76,8 +76,6 @@ const uint32_t V4L2SliceVideoDecodeAccelerator::supported_input_fourccs_[] = { ...@@ -76,8 +76,6 @@ const uint32_t V4L2SliceVideoDecodeAccelerator::supported_input_fourccs_[] = {
V4L2_PIX_FMT_H264_SLICE, V4L2_PIX_FMT_VP8_FRAME, V4L2_PIX_FMT_VP9_FRAME, V4L2_PIX_FMT_H264_SLICE, V4L2_PIX_FMT_VP8_FRAME, V4L2_PIX_FMT_VP9_FRAME,
}; };
V4L2SliceVideoDecodeAccelerator::InputRecord::InputRecord() : request_fd(-1) {}
V4L2SliceVideoDecodeAccelerator::OutputRecord::OutputRecord() V4L2SliceVideoDecodeAccelerator::OutputRecord::OutputRecord()
: at_client(false), : at_client(false),
num_times_sent_to_client(0), num_times_sent_to_client(0),
...@@ -168,7 +166,7 @@ V4L2SliceVideoDecodeAccelerator::~V4L2SliceVideoDecodeAccelerator() { ...@@ -168,7 +166,7 @@ V4L2SliceVideoDecodeAccelerator::~V4L2SliceVideoDecodeAccelerator() {
DCHECK(!decoder_thread_.IsRunning()); DCHECK(!decoder_thread_.IsRunning());
DCHECK(!device_poll_thread_.IsRunning()); DCHECK(!device_poll_thread_.IsRunning());
DCHECK(input_buffer_map_.empty()); DCHECK(requests_.empty());
DCHECK(output_buffer_map_.empty()); DCHECK(output_buffer_map_.empty());
} }
...@@ -478,7 +476,6 @@ bool V4L2SliceVideoDecodeAccelerator::CreateInputBuffers() { ...@@ -478,7 +476,6 @@ bool V4L2SliceVideoDecodeAccelerator::CreateInputBuffers() {
VLOGF(2); VLOGF(2);
DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread()); DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
DCHECK(!input_queue_->IsStreaming()); DCHECK(!input_queue_->IsStreaming());
DCHECK(input_buffer_map_.empty());
if (input_queue_->AllocateBuffers(kNumInputBuffers, V4L2_MEMORY_MMAP) < if (input_queue_->AllocateBuffers(kNumInputBuffers, V4L2_MEMORY_MMAP) <
kNumInputBuffers) { kNumInputBuffers) {
...@@ -486,14 +483,14 @@ bool V4L2SliceVideoDecodeAccelerator::CreateInputBuffers() { ...@@ -486,14 +483,14 @@ bool V4L2SliceVideoDecodeAccelerator::CreateInputBuffers() {
return false; return false;
} }
input_buffer_map_.resize(input_queue_->AllocatedBuffersCount());
// The remainder of this method only applies if requests are used. // The remainder of this method only applies if requests are used.
if (!supports_requests_) if (!supports_requests_)
return true; return true;
DCHECK(requests_.empty());
DCHECK(media_fd_.is_valid()); DCHECK(media_fd_.is_valid());
for (auto& input_record : input_buffer_map_) { for (size_t i = 0; i < input_queue_->AllocatedBuffersCount(); i++) {
int request_fd; int request_fd;
int ret = HANDLE_EINTR( int ret = HANDLE_EINTR(
...@@ -502,8 +499,10 @@ bool V4L2SliceVideoDecodeAccelerator::CreateInputBuffers() { ...@@ -502,8 +499,10 @@ bool V4L2SliceVideoDecodeAccelerator::CreateInputBuffers() {
VPLOGF(1) << "Failed to create request: "; VPLOGF(1) << "Failed to create request: ";
return false; return false;
} }
input_record.request_fd = base::ScopedFD(request_fd);
requests_.push(base::ScopedFD(request_fd));
} }
DCHECK_EQ(requests_.size(), input_queue_->AllocatedBuffersCount());
return true; return true;
} }
...@@ -583,12 +582,10 @@ void V4L2SliceVideoDecodeAccelerator::DestroyInputBuffers() { ...@@ -583,12 +582,10 @@ void V4L2SliceVideoDecodeAccelerator::DestroyInputBuffers() {
DCHECK(!input_queue_->IsStreaming()); DCHECK(!input_queue_->IsStreaming());
if (input_buffer_map_.empty())
return;
input_queue_->DeallocateBuffers(); input_queue_->DeallocateBuffers();
input_buffer_map_.clear(); if (supports_requests_)
requests_ = {};
} }
void V4L2SliceVideoDecodeAccelerator::DismissPictures( void V4L2SliceVideoDecodeAccelerator::DismissPictures(
...@@ -664,7 +661,7 @@ void V4L2SliceVideoDecodeAccelerator::SchedulePollIfNeeded() { ...@@ -664,7 +661,7 @@ void V4L2SliceVideoDecodeAccelerator::SchedulePollIfNeeded() {
<< "INPUT[" << decoder_input_queue_.size() << "]" << "INPUT[" << decoder_input_queue_.size() << "]"
<< " => DEVICE[" << input_queue_->FreeBuffersCount() << "+" << " => DEVICE[" << input_queue_->FreeBuffersCount() << "+"
<< input_queue_->QueuedBuffersCount() << "/" << input_queue_->QueuedBuffersCount() << "/"
<< input_buffer_map_.size() << "]->[" << input_queue_->AllocatedBuffersCount() << "]->["
<< output_queue_->FreeBuffersCount() << "+" << output_queue_->FreeBuffersCount() << "+"
<< output_queue_->QueuedBuffersCount() << "/" << output_queue_->QueuedBuffersCount() << "/"
<< output_buffer_map_.size() << "]" << output_buffer_map_.size() << "]"
...@@ -1927,14 +1924,17 @@ V4L2SliceVideoDecodeAccelerator::CreateSurface() { ...@@ -1927,14 +1924,17 @@ V4L2SliceVideoDecodeAccelerator::CreateSurface() {
int input = input_buffer.BufferId(); int input = input_buffer.BufferId();
int output = output_buffer.BufferId(); int output = output_buffer.BufferId();
InputRecord& input_record = input_buffer_map_[input];
scoped_refptr<V4L2DecodeSurface> dec_surface; scoped_refptr<V4L2DecodeSurface> dec_surface;
if (supports_requests_) { if (supports_requests_) {
auto ret = V4L2RequestDecodeSurface::Create( // Here we just borrow the older request to use it, before
std::move(input_buffer), std::move(output_buffer), nullptr, // immediately putting it back at the back of the queue.
input_record.request_fd.get()); base::ScopedFD request = std::move(requests_.front());
requests_.pop();
auto ret = V4L2RequestDecodeSurface::Create(std::move(input_buffer),
std::move(output_buffer),
nullptr, request.get());
requests_.push(std::move(request));
if (!ret) if (!ret)
return nullptr; return nullptr;
...@@ -2044,7 +2044,8 @@ bool V4L2SliceVideoDecodeAccelerator::OnMemoryDump( ...@@ -2044,7 +2044,8 @@ bool V4L2SliceVideoDecodeAccelerator::OnMemoryDump(
DCHECK(decoder_thread_.task_runner()->BelongsToCurrentThread()); DCHECK(decoder_thread_.task_runner()->BelongsToCurrentThread());
// VIDEO_OUTPUT queue's memory usage. // VIDEO_OUTPUT queue's memory usage.
const size_t input_queue_buffers_count = input_buffer_map_.size(); const size_t input_queue_buffers_count =
input_queue_->AllocatedBuffersCount();
size_t input_queue_memory_usage = 0; size_t input_queue_memory_usage = 0;
std::string input_queue_buffers_memory_type = std::string input_queue_buffers_memory_type =
V4L2Device::V4L2MemoryToString(V4L2_MEMORY_MMAP); V4L2Device::V4L2MemoryToString(V4L2_MEMORY_MMAP);
......
...@@ -77,12 +77,6 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator ...@@ -77,12 +77,6 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator
base::trace_event::ProcessMemoryDump* pmd) override; base::trace_event::ProcessMemoryDump* pmd) override;
private: private:
// Record for input buffers.
struct InputRecord {
InputRecord();
// Request fd used for this input buffer if request API is used.
base::ScopedFD request_fd;
};
// Record for output buffers. // Record for output buffers.
struct OutputRecord { struct OutputRecord {
...@@ -375,8 +369,6 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator ...@@ -375,8 +369,6 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator
base::Thread device_poll_thread_; base::Thread device_poll_thread_;
scoped_refptr<V4L2Queue> input_queue_; scoped_refptr<V4L2Queue> input_queue_;
// Mapping of int index to an input buffer record.
std::vector<InputRecord> input_buffer_map_;
// Set to true by CreateInputBuffers() if the codec driver supports requests // Set to true by CreateInputBuffers() if the codec driver supports requests
bool supports_requests_ = false; bool supports_requests_ = false;
// Stores the media file descriptor if request API is used // Stores the media file descriptor if request API is used
...@@ -388,6 +380,8 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator ...@@ -388,6 +380,8 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator
std::map<int32_t, V4L2WritableBufferRef> output_wait_map_; std::map<int32_t, V4L2WritableBufferRef> output_wait_map_;
// Mapping of int index to an output buffer record. // Mapping of int index to an output buffer record.
std::vector<OutputRecord> output_buffer_map_; std::vector<OutputRecord> output_buffer_map_;
// FIFO queue of requests, only used if supports_requests_ == true.
std::queue<base::ScopedFD> requests_;
VideoCodecProfile video_profile_; VideoCodecProfile video_profile_;
uint32_t input_format_fourcc_; uint32_t input_format_fourcc_;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment