Video capture with GpuMemoryBuffer - RTCVideoEncoder

This CL enables VEA native input mode in RTCVideoEncoder when the input frame is backed by GpuMemoryBuffer. To support frame scaling for simulcast use cases, we also extend VAAPI video encoder to scale the input frame with VAAPI video post-processing. Bug: 982201 Test: Manually test on DUT with Hangouts Meet. Change-Id: I8f68f492f07d7d8fbdb2686d611cc045ab4f38e2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1716023Reviewed-by: Dan Sanders <sandersd@chromium.org> Reviewed-by: Kenneth Russell <kbr@chromium.org> Reviewed-by: Hirokazu Honda <hiroh@chromium.org> Commit-Queue: Ricky Liang <jcliang@chromium.org> Cr-Commit-Position: refs/heads/master@{#703580}

Video capture with GpuMemoryBuffer - RTCVideoEncoder
This CL enables VEA native input mode in RTCVideoEncoder when the input frame is backed by GpuMemoryBuffer. To support frame scaling for simulcast use cases, we also extend VAAPI video encoder to scale the input frame with VAAPI video post-processing. Bug: 982201 Test: Manually test on DUT with Hangouts Meet. Change-Id: I8f68f492f07d7d8fbdb2686d611cc045ab4f38e2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1716023Reviewed-by: Dan Sanders <sandersd@chromium.org> Reviewed-by: Kenneth Russell <kbr@chromium.org> Reviewed-by: Hirokazu Honda <hiroh@chromium.org> Commit-Queue: Ricky Liang <jcliang@chromium.org> Cr-Commit-Position: refs/heads/master@{#703580}
c1516068 · Ricky Liang · Commit Bot · 40d903c1 · c1516068 · c1516068
Commit c1516068 authored Oct 08, 2019 by Ricky Liang Committed by Commit Bot Oct 08, 2019
6 changed files
--- a/content/browser/renderer_host/render_process_host_impl.cc
+++ b/content/browser/renderer_host/render_process_host_impl.cc
@@ -3146,6 +3146,7 @@ void RenderProcessHostImpl::PropagateBrowserCommandLineToRenderer(
    switches::kUseGpuInTests,
    switches::kUseMobileUserAgent,
    switches::kV,
+    switches::kVideoCaptureUseGpuMemoryBuffer,
    switches::kVideoThreads,
    switches::kVideoUnderflowThresholdMs,
    switches::kVModule,

--- a/media/gpu/vaapi/vaapi_video_encode_accelerator.cc
+++ b/media/gpu/vaapi/vaapi_video_encode_accelerator.cc
@@ -399,9 +399,14 @@ void VaapiVideoEncodeAccelerator::InitializeTask(const Config& config) {
      base::BindRepeating(&VaapiVideoEncodeAccelerator::RecycleVASurfaceID,
                          base::Unretained(this)));

+  // In native input mode, an input surface is needed only if scaling
+  // is not required. Since we cannot find the necessity of the scaling here,
+  // we allocate input surfaces always, which is redundant.
+  //
+  // TODO(hiroh): Think about moving this surface creation in the first
+  // Encode().
  va_surfaces_per_video_frame_ =
-      kNumSurfacesForOutputPicture +
-      (native_input_mode_ ? 0 : kNumSurfacesPerInputVideoFrame);
+      kNumSurfacesForOutputPicture + kNumSurfacesPerInputVideoFrame;

  if (!vaapi_wrapper_->CreateContextAndSurfaces(
          kVaSurfaceFormat, aligned_input_size_,
@@ -583,12 +588,46 @@ scoped_refptr<VaapiEncodeJob> VaapiVideoEncodeAccelerator::CreateEncodeJob(
      return nullptr;
    }
  } else {
+    if (aligned_input_size_ != frame->coded_size()) {
+      NOTIFY_ERROR(kPlatformFailureError,
+                   "Expected frame size: " << aligned_input_size_.ToString()
+                                           << ", but got: "
+                                           << frame->coded_size().ToString());
+      return nullptr;
+    }
    input_surface =
        new VASurface(available_va_surface_ids_.back(), aligned_input_size_,
                      kVaSurfaceFormat, base::BindOnce(va_surface_release_cb_));
    available_va_surface_ids_.pop_back();
  }

+  if (aligned_input_size_ !=
+      gfx::Size(frame->stride(0), frame->coded_size().height())) {
+    // Do scaling.  Here the buffer size contained in |input_surface| is
+    // |frame->coded_size()|.
+    if (!vpp_vaapi_wrapper_) {
+      vpp_vaapi_wrapper_ =
+          VaapiWrapper::Create(VaapiWrapper::kVideoProcess, VAProfileNone,
+                               base::Bind(&ReportToUMA, VAAPI_ERROR));
+      if (!vpp_vaapi_wrapper_) {
+        NOTIFY_ERROR(kPlatformFailureError,
+                     "Failed to initialize VppVaapiWrapper");
+        return nullptr;
+      }
+    }
+    scoped_refptr<VASurface> scaled_surface =
+        new VASurface(available_va_surface_ids_.back(), aligned_input_size_,
+                      kVaSurfaceFormat, base::BindOnce(va_surface_release_cb_));
+    available_va_surface_ids_.pop_back();
+    // Scale frame->coded_size() -> |aligned_input_size_| here.
+    vpp_vaapi_wrapper_->BlitSurface(input_surface, scaled_surface);
+    // We can destroy the original |input_surface| because the buffer is alive
+    // as long as |frame| is alive.
+    input_surface = std::move(scaled_surface);
+  }
+
+  // Here, the size contained in |input_surface| is |aligned_input_size_|
+  // regardless of scaling.
  scoped_refptr<VASurface> reconstructed_surface =
      new VASurface(available_va_surface_ids_.back(), aligned_input_size_,
                    kVaSurfaceFormat, base::BindOnce(va_surface_release_cb_));

--- a/media/gpu/vaapi/vaapi_video_encode_accelerator.h
+++ b/media/gpu/vaapi/vaapi_video_encode_accelerator.h
@@ -206,6 +206,10 @@ class MEDIA_GPU_EXPORT VaapiVideoEncodeAccelerator
  std::unique_ptr<base::WeakPtrFactory<Client>> client_ptr_factory_;
  base::WeakPtr<Client> client_;

+  // VaapiWrapper for VPP (Video Pre Processing). This is used for scale down
+  // for the picture send to vaapi encoder.
+  scoped_refptr<VaapiWrapper> vpp_vaapi_wrapper_;
+
  // WeakPtr to post from the encoder thread back to the ChildThread, as it may
  // outlive this. Posting from the ChildThread using base::Unretained(this)
  // to the encoder thread is safe, because |this| always outlives the encoder

--- a/media/mojo/services/mojo_video_encode_accelerator_service.cc
+++ b/media/mojo/services/mojo_video_encode_accelerator_service.cc
@@ -89,7 +89,8 @@ void MojoVideoEncodeAcceleratorService::Encode(
  if (!encoder_)
    return;

-  if (frame->coded_size() != input_coded_size_) {
+  if (frame->coded_size() != input_coded_size_ &&
+      frame->storage_type() != media::VideoFrame::STORAGE_GPU_MEMORY_BUFFER) {
    DLOG(ERROR) << __func__ << " wrong input coded size, expected "
                << input_coded_size_.ToString() << ", got "
                << frame->coded_size().ToString();

--- a/third_party/blink/renderer/platform/DEPS
+++ b/third_party/blink/renderer/platform/DEPS
@@ -6,6 +6,7 @@ include_rules = [
    "+base/bind.h",
    "+base/bind_helpers.h",
    "+base/bit_cast.h",
+    "+base/command_line.h",
    "+base/compiler_specific.h",
    "+base/cpu.h",
    "+base/feature_list.h",

--- a/third_party/blink/renderer/platform/peerconnection/rtc_video_encoder.cc
+++ b/third_party/blink/renderer/platform/peerconnection/rtc_video_encoder.cc
@@ -8,6 +8,9 @@
 #include <memory>
 #include <vector>

+#include "base/bind.h"
+#include "base/bind_helpers.h"
+#include "base/command_line.h"
 #include "base/location.h"
 #include "base/logging.h"
 #include "base/memory/unsafe_shared_memory_region.h"
@@ -22,6 +25,7 @@
 #include "base/time/time.h"
 #include "media/base/bind_to_current_loop.h"
 #include "media/base/bitstream_buffer.h"
+#include "media/base/media_switches.h"
 #include "media/base/video_bitrate_allocation.h"
 #include "media/base/video_frame.h"
 #include "media/base/video_util.h"
@@ -211,6 +215,12 @@ class RTCVideoEncoder::Impl
  // Perform encoding on an input frame from the input queue.
  void EncodeOneFrame();

+  // Perform encoding on an input frame from the input queue using VEA native
+  // input mode.  The input frame must be backed with GpuMemoryBuffer buffers.
+  void EncodeOneFrameWithNativeInput();
+
+  void CreateBlackGpuMemoryBufferFrame(const gfx::Size& natural_size);
+
  // Notify that an input frame is finished for encoding.  |index| is the index
  // of the completed frame in |input_buffers_|.
  void EncodeFrameFinished(int index);
@@ -289,6 +299,13 @@ class RTCVideoEncoder::Impl
  // encoder.
  int output_buffers_free_count_;

+  // Whether to send the frames to VEA as native buffer. Native buffer allows
+  // VEA to pass the buffer to the encoder directly without further processing.
+  bool use_native_input_;
+
+  // A black GpuMemoryBuffer frame used when the video track is disabled.
+  scoped_refptr<media::VideoFrame> black_gmb_frame_;
+
  // webrtc::VideoEncoder encode complete callback.
  webrtc::EncodedImageCallback* encoded_image_callback_;

@@ -321,6 +338,7 @@ RTCVideoEncoder::Impl::Impl(media::GpuVideoAcceleratorFactories* gpu_factories,
      input_next_frame_(nullptr),
      input_next_frame_keyframe_(false),
      output_buffers_free_count_(0),
+      use_native_input_(false),
      encoded_image_callback_(nullptr),
      video_codec_type_(video_codec_type),
      video_content_type_(video_content_type),
@@ -351,9 +369,21 @@ void RTCVideoEncoder::Impl::CreateAndInitializeVEA(
    return;
  }
  input_visible_size_ = input_visible_size;
+  media::VideoPixelFormat pixel_format = media::PIXEL_FORMAT_I420;
+  auto storage_type =
+      media::VideoEncodeAccelerator::Config::StorageType::kShmem;
+  if (base::CommandLine::ForCurrentProcess()->HasSwitch(
+          switches::kVideoCaptureUseGpuMemoryBuffer) &&
+      video_content_type_ != webrtc::VideoContentType::SCREENSHARE) {
+    // Use import mode for camera when GpuMemoryBuffer-based video capture is
+    // enabled.
+    pixel_format = media::PIXEL_FORMAT_NV12;
+    storage_type = media::VideoEncodeAccelerator::Config::StorageType::kDmabuf;
+    use_native_input_ = true;
+  }
  const media::VideoEncodeAccelerator::Config config(
-      media::PIXEL_FORMAT_I420, input_visible_size_, profile, bitrate * 1000,
-      base::nullopt, base::nullopt, base::nullopt, base::nullopt,
+      pixel_format, input_visible_size_, profile, bitrate * 1000, base::nullopt,
+      base::nullopt, base::nullopt, storage_type,
      video_content_type_ == webrtc::VideoContentType::SCREENSHARE
          ? media::VideoEncodeAccelerator::Config::ContentType::kDisplay
          : media::VideoEncodeAccelerator::Config::ContentType::kCamera);
@@ -382,10 +412,10 @@ void RTCVideoEncoder::Impl::Enqueue(const webrtc::VideoFrame* input_frame,
  }

  // If there are no free input and output buffers, drop the frame to avoid a
-  // deadlock. If there is a free input buffer, EncodeOneFrame will run and
-  // unblock Encode(). If there are no free input buffers but there is a free
-  // output buffer, EncodeFrameFinished will be called later to unblock
-  // Encode().
+  // deadlock. If there is a free input buffer and |use_native_input_| is false,
+  // EncodeOneFrame will run and unblock Encode(). If there are no free input
+  // buffers but there is a free output buffer, EncodeFrameFinished will be
+  // called later to unblock Encode().
  //
  // The caller of Encode() holds a webrtc lock. The deadlock happens when:
  // (1) Encode() is waiting for the frame to be encoded in EncodeOneFrame().
@@ -399,7 +429,8 @@ void RTCVideoEncoder::Impl::Enqueue(const webrtc::VideoFrame* input_frame,
  // buffers. Returning an error in Encode() is not fatal and WebRTC will just
  // continue. If this is a key frame, WebRTC will request a key frame again.
  // Besides, webrtc will drop a frame if Encode() blocks too long.
-  if (input_buffers_free_.IsEmpty() && output_buffers_free_count_ == 0) {
+  if (!use_native_input_ && input_buffers_free_.IsEmpty() &&
+      output_buffers_free_count_ == 0) {
    DVLOG(2) << "Run out of input and output buffers. Drop the frame.";
    SignalAsyncWaiter(WEBRTC_VIDEO_CODEC_ERROR);
    return;
@@ -407,6 +438,13 @@ void RTCVideoEncoder::Impl::Enqueue(const webrtc::VideoFrame* input_frame,
  input_next_frame_ = input_frame;
  input_next_frame_keyframe_ = force_keyframe;

+  // If |use_native_input_| is true, then we always queue the frame to the
+  // encoder since no intermediate buffer is needed in RTCVideoEncoder.
+  if (use_native_input_) {
+    EncodeOneFrameWithNativeInput();
+    return;
+  }
+
  if (!input_buffers_free_.IsEmpty())
    EncodeOneFrame();
 }
@@ -757,9 +795,97 @@ void RTCVideoEncoder::Impl::EncodeOneFrame() {
  SignalAsyncWaiter(WEBRTC_VIDEO_CODEC_OK);
 }

+void RTCVideoEncoder::Impl::EncodeOneFrameWithNativeInput() {
+  DVLOG(3) << "Impl::EncodeOneFrameWithNativeInput()";
+  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
+  DCHECK(input_next_frame_);
+
+  // EncodeOneFrameWithNativeInput() may re-enter EncodeFrameFinished() if
+  // VEA::Encode() fails, we receive a VEA::NotifyError(), and the
+  // media::VideoFrame we pass to Encode() gets destroyed early.  Handle this by
+  // resetting our input_next_frame_* state before we hand off the VideoFrame to
+  // the VEA.
+  const webrtc::VideoFrame* next_frame = input_next_frame_;
+  const bool next_frame_keyframe = input_next_frame_keyframe_;
+  input_next_frame_ = nullptr;
+  input_next_frame_keyframe_ = false;
+
+  if (!video_encoder_) {
+    SignalAsyncWaiter(WEBRTC_VIDEO_CODEC_ERROR);
+    return;
+  }
+
+  scoped_refptr<media::VideoFrame> frame;
+  if (next_frame->video_frame_buffer()->type() !=
+      webrtc::VideoFrameBuffer::Type::kNative) {
+    // If we get a non-native frame it's because the video track is disabled and
+    // WebRTC VideoBroadcaster replaces the camera frame with a black YUV frame.
+    if (!black_gmb_frame_) {
+      gfx::Size natural_size(next_frame->width(), next_frame->height());
+      CreateBlackGpuMemoryBufferFrame(natural_size);
+    }
+    frame = media::VideoFrame::WrapVideoFrame(
+        black_gmb_frame_, black_gmb_frame_->format(),
+        black_gmb_frame_->visible_rect(), black_gmb_frame_->natural_size());
+    frame->set_timestamp(
+        base::TimeDelta::FromMilliseconds(next_frame->ntp_time_ms()));
+  } else {
+    frame = static_cast<blink::WebRtcVideoFrameAdapter*>(
+                next_frame->video_frame_buffer().get())
+                ->getMediaVideoFrame();
+  }
+  DCHECK_EQ(frame->storage_type(),
+            media::VideoFrame::STORAGE_GPU_MEMORY_BUFFER);
+
+  constexpr int kDummyIndex = -1;
+  frame->AddDestructionObserver(media::BindToCurrentLoop(base::BindOnce(
+      &RTCVideoEncoder::Impl::EncodeFrameFinished, this, kDummyIndex)));
+  if (!failed_timestamp_match_) {
+    DCHECK(std::find_if(pending_timestamps_.begin(), pending_timestamps_.end(),
+                        [&frame](const RTCTimestamps& entry) {
+                          return entry.media_timestamp_ == frame->timestamp();
+                        }) == pending_timestamps_.end());
+    pending_timestamps_.emplace_back(frame->timestamp(),
+                                     next_frame->timestamp(),
+                                     next_frame->render_time_ms());
+  }
+  video_encoder_->Encode(frame, next_frame_keyframe);
+  SignalAsyncWaiter(WEBRTC_VIDEO_CODEC_OK);
+}
+
+void RTCVideoEncoder::Impl::CreateBlackGpuMemoryBufferFrame(
+    const gfx::Size& natural_size) {
+  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
+
+  auto gmb = gpu_factories_->CreateGpuMemoryBuffer(
+      natural_size, gfx::BufferFormat::YUV_420_BIPLANAR,
+      gfx::BufferUsage::SCANOUT_VEA_READ_CAMERA_AND_CPU_READ_WRITE);
+
+  // Fills the NV12 frame with YUV black (0x00, 0x80, 0x80).
+  const auto gmb_size = gmb->GetSize();
+  gmb->Map();
+  memset(static_cast<uint8_t*>(gmb->memory(0)), 0x0,
+         gmb->stride(0) * gmb_size.height());
+  memset(static_cast<uint8_t*>(gmb->memory(1)), 0x80,
+         gmb->stride(1) * gmb_size.height());
+  gmb->Unmap();
+
+  gpu::MailboxHolder empty_mailboxes[media::VideoFrame::kMaxPlanes];
+  black_gmb_frame_ = media::VideoFrame::WrapExternalGpuMemoryBuffer(
+      gfx::Rect(gmb_size), natural_size, std::move(gmb), empty_mailboxes,
+      base::NullCallback(), base::TimeDelta());
+}
+
 void RTCVideoEncoder::Impl::EncodeFrameFinished(int index) {
  DVLOG(3) << "Impl::EncodeFrameFinished(): index=" << index;
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
+
+  if (use_native_input_) {
+    if (input_next_frame_)
+      EncodeOneFrameWithNativeInput();
+    return;
+  }
+
  DCHECK_GE(index, 0);
  DCHECK_LT(index, static_cast<int>(input_buffers_.size()));
  input_buffers_free_.push_back(index);