Commit 6ddfe0d8 authored by Alexandre Courbot's avatar Alexandre Courbot Committed by Commit Bot

media/gpu/v4l2svda: use queue of GL fences

As we are planning to add IP support, GL fences will not always be
associated to decoder buffers - if the IP is in use, then the fence will
refer to the IP buffer and we must thus prevent it from being used for
processing again.

However, the way GL fences management is done is to wait on a fence
before queuing a decoder buffer. This CL addresses this by using a FIFO
queue of GL fences which retain their associated decode surface with
them. Since the IP buffer will be tied to the decode surface, this means
it will be kept for as long as required.

This design is similar to what was done in the V4L2VDA to address the
same situation.

Bug: b:132589320
Test: vdaunittest and vdatests pass on Minnie.
Change-Id: I06ba55d995a5ba3bc9fc09fcc4b2358987294eb4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1741431
Commit-Queue: Alexandre Courbot <acourbot@chromium.org>
Reviewed-by: default avatarHirokazu Honda <hiroh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#685456}
parent db912353
......@@ -837,37 +837,6 @@ bool V4L2SliceVideoDecodeAccelerator::EnqueueOutputRecord(
OutputRecord& output_record = output_buffer_map_[index];
DCHECK_NE(output_record.picture_id, -1);
if (output_record.egl_fence) {
TRACE_EVENT0("media,gpu",
"V4L2SVDA::EnqueueOutputRecord: "
"GLFenceEGL::ClientWaitWithTimeoutNanos");
// If we have to wait for completion, wait. Note that free_output_buffers_
// is a FIFO queue, so we always wait on the buffer that has been in the
// queue the longest. Every 100ms we check whether the decoder is shutting
// down, or we might get stuck waiting on a fence that will never come:
// https://crbug.com/845645
while (!IsDestroyPending()) {
const EGLTimeKHR wait_ns =
base::TimeDelta::FromMilliseconds(100).InNanoseconds();
EGLint result =
output_record.egl_fence->ClientWaitWithTimeoutNanos(wait_ns);
if (result == EGL_CONDITION_SATISFIED_KHR) {
break;
} else if (result == EGL_FALSE) {
// This will cause tearing, but is safe otherwise.
DVLOGF(1) << "GLFenceEGL::ClientWaitWithTimeoutNanos failed!";
break;
}
DCHECK_EQ(result, EGL_TIMEOUT_EXPIRED_KHR);
}
if (IsDestroyPending())
return false;
output_record.egl_fence.reset();
}
bool ret;
if (output_mode_ == Config::OutputMode::ALLOCATE)
ret = std::move(output_buffer).QueueMMap();
......@@ -1161,8 +1130,6 @@ bool V4L2SliceVideoDecodeAccelerator::DestroyOutputs(bool dismiss) {
return true;
for (auto& output_record : output_buffer_map_) {
output_record.egl_fence.reset();
picture_buffers_to_dismiss.push_back(output_record.picture_id);
}
......@@ -1198,6 +1165,9 @@ bool V4L2SliceVideoDecodeAccelerator::DestroyOutputBuffers() {
// Release all buffers waiting for an import buffer event.
output_wait_map_.clear();
// Release all buffers awaiting a fence since we are about to destroy them.
surfaces_awaiting_fence_ = {};
// It's ok to do this, client will retain references to textures, but we are
// not interested in reusing the surfaces anymore.
// This will prevent us from reusing old surfaces in case we have some
......@@ -1320,7 +1290,6 @@ void V4L2SliceVideoDecodeAccelerator::AssignPictureBuffersTask(
for (size_t i = 0; i < buffers.size(); i++) {
OutputRecord& output_record = output_buffer_map_[i];
DCHECK(!output_record.egl_fence);
DCHECK_EQ(output_record.picture_id, -1);
DCHECK(output_record.dmabuf_fds.empty());
DCHECK_EQ(output_record.cleared, false);
......@@ -1431,7 +1400,6 @@ void V4L2SliceVideoDecodeAccelerator::AssignDmaBufs(
}
OutputRecord& output_record = output_buffer_map_[buffer_index];
DCHECK(!output_record.egl_fence);
DCHECK_EQ(output_wait_map_.count(picture_buffer_id), 1u);
if (output_mode_ == Config::OutputMode::IMPORT) {
......@@ -1612,7 +1580,8 @@ void V4L2SliceVideoDecodeAccelerator::ReusePictureBufferTask(
if (output_record.num_times_sent_to_client == 0) {
output_record.at_client = false;
// Take ownership of the EGL fence.
output_record.egl_fence = std::move(egl_fence);
surfaces_awaiting_fence_.push(
std::make_pair(std::move(egl_fence), std::move(it->second)));
surfaces_at_display_.erase(it);
}
......@@ -1908,6 +1877,33 @@ void V4L2SliceVideoDecodeAccelerator::OutputSurface(
output_record.cleared = true;
}
void V4L2SliceVideoDecodeAccelerator::CheckGLFences() {
DVLOGF(4);
DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
while (!surfaces_awaiting_fence_.empty() &&
surfaces_awaiting_fence_.front().first->HasCompleted()) {
// Buffer at the front of the queue goes back to V4L2Queue's free list
// and can be reused.
surfaces_awaiting_fence_.pop();
}
// If we have no free buffers available, then preemptively schedule a
// call to DecodeBufferTask() in a short time, otherwise we may starve out
// of buffers because fences will not call back into us once they are
// signaled. The delay chosen roughly corresponds to the time a frame is
// displayed, which should be optimal in most cases.
if (output_queue_->FreeBuffersCount() == 0) {
constexpr int64_t kRescheduleDelayMs = 17;
decoder_thread_.task_runner()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&V4L2SliceVideoDecodeAccelerator::DecodeBufferTask,
base::Unretained(this)),
base::TimeDelta::FromMilliseconds(kRescheduleDelayMs));
}
}
scoped_refptr<V4L2DecodeSurface>
V4L2SliceVideoDecodeAccelerator::CreateSurface() {
DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
......@@ -1924,12 +1920,17 @@ V4L2SliceVideoDecodeAccelerator::CreateSurface() {
GetNumOfOutputRecordsAtClient(), "at device",
GetNumOfOutputRecordsAtDevice());
// Release some output buffers if their fence has been signaled.
CheckGLFences();
if (input_queue_->FreeBuffersCount() == 0 ||
output_queue_->FreeBuffersCount() == 0)
return nullptr;
V4L2WritableBufferRef input_buffer = input_queue_->GetFreeBuffer();
DCHECK(input_buffer.IsValid());
// All buffers that are returned to the output free queue have their GL
// fence signaled, so we can use them directly.
V4L2WritableBufferRef output_buffer = output_queue_->GetFreeBuffer();
DCHECK(output_buffer.IsValid());
......
......@@ -88,7 +88,6 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator
int32_t picture_id;
GLuint client_texture_id;
GLuint texture_id;
std::unique_ptr<gl::GLFenceEGL> egl_fence;
std::vector<base::ScopedFD> dmabuf_fds;
bool cleared;
};
......@@ -128,6 +127,10 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator
// Below methods are used by accelerator implementations.
//
// V4L2DecodeSurfaceHandler implementation.
// Release surfaces awaiting for their fence to be signaled.
void CheckGLFences();
scoped_refptr<V4L2DecodeSurface> CreateSurface() override;
// SurfaceReady() uses |decoder_display_queue_| to guarantee that decoding
// of |dec_surface| happens in order.
......@@ -430,6 +433,12 @@ class MEDIA_GPU_EXPORT V4L2SliceVideoDecodeAccelerator
std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>;
V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
// Queue of surfaces that have been returned by the client, but which fence
// hasn't been signaled yet.
std::queue<std::pair<std::unique_ptr<gl::GLFenceEGL>,
scoped_refptr<V4L2DecodeSurface>>>
surfaces_awaiting_fence_;
// Record for decoded pictures that can be sent to PictureReady.
struct PictureRecord {
PictureRecord(bool cleared, const Picture& picture);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment