Commit c413d3b7 authored by Hirokazu Honda's avatar Hirokazu Honda Committed by Commit Bot

RtcVideoEncoder: Fill webrtc::CodecSpecific with EncodedImage in vp9 temporal layer encoding case

Vp9Metadata is returned in VEA::BitstreamReady() on vp9 temporal
layer encoding. RtcVideoEncoder propagates the metadata
as webrtc::CodecSpecific as webrtc::CodecSpecific. It is
important information so that libwebrtc and webrtc server works
manages the video call to work effectively.

Design doc: go/vaapi-vp9-TL

Bug: 1030199
Test: blink_platform_unittests
Test: appr.tc/?debug=loopback&vsc=vp9&vrc=vp9 with --force-fieldtrials=WebRTC-SupportVP9SVC/EnabledByFlag_1SL3TL/ --enable-features=VaapiVP9Encoder
Change-Id: I0e2b4bb3b98bc04e55c767a7bba9316b6bea4eea
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2153458
Commit-Queue: Hirokazu Honda <hiroh@chromium.org>
Reviewed-by: default avatarDan Sanders <sandersd@chromium.org>
Cr-Commit-Position: refs/heads/master@{#784878}
parent 909ec166
...@@ -463,6 +463,7 @@ class RTCVideoEncoder::Impl ...@@ -463,6 +463,7 @@ class RTCVideoEncoder::Impl
// Return an encoded output buffer to WebRTC. // Return an encoded output buffer to WebRTC.
void ReturnEncodedImage(const webrtc::EncodedImage& image, void ReturnEncodedImage(const webrtc::EncodedImage& image,
const webrtc::CodecSpecificInfo& info,
int32_t bitstream_buffer_id); int32_t bitstream_buffer_id);
void SetStatus(int32_t status); void SetStatus(int32_t status);
...@@ -940,7 +941,66 @@ void RTCVideoEncoder::Impl::BitstreamBufferReady( ...@@ -940,7 +941,66 @@ void RTCVideoEncoder::Impl::BitstreamBufferReady(
image.content_type_ = video_content_type_; image.content_type_ = video_content_type_;
image._completeFrame = true; image._completeFrame = true;
ReturnEncodedImage(image, bitstream_buffer_id); webrtc::CodecSpecificInfo info;
info.codecType = video_codec_type_;
switch (video_codec_type_) {
case webrtc::kVideoCodecVP8:
info.codecSpecific.VP8.keyIdx = -1;
break;
case webrtc::kVideoCodecVP9: {
bool key_frame =
image._frameType == webrtc::VideoFrameType::kVideoFrameKey;
webrtc::CodecSpecificInfoVP9& vp9 = info.codecSpecific.VP9;
if (metadata.vp9) {
// Temporal layer stream.
vp9.first_frame_in_picture = true;
vp9.inter_pic_predicted = metadata.vp9->has_reference;
vp9.flexible_mode = true;
vp9.non_ref_for_inter_layer_pred = false;
vp9.temporal_idx = metadata.vp9->temporal_idx;
vp9.temporal_up_switch = metadata.vp9->temporal_up_switch;
vp9.inter_layer_predicted = false;
vp9.gof_idx = 0;
vp9.num_ref_pics = metadata.vp9->p_diffs.size();
for (size_t i = 0; i < metadata.vp9->p_diffs.size(); ++i)
vp9.p_diff[i] = metadata.vp9->p_diffs[i];
vp9.end_of_picture = true;
vp9.ss_data_available = key_frame;
vp9.first_active_layer = 0u;
vp9.spatial_layer_resolution_present = true;
vp9.num_spatial_layers = 1u;
vp9.width[0] = image._encodedWidth;
vp9.height[0] = image._encodedHeight;
} else {
// Simple stream, neither temporal nor spatial layer stream.
vp9.flexible_mode = false;
vp9.temporal_idx = webrtc::kNoTemporalIdx;
vp9.temporal_up_switch = true;
vp9.inter_layer_predicted = false;
vp9.gof_idx = 0;
vp9.num_spatial_layers = 1;
vp9.first_frame_in_picture = true;
vp9.end_of_picture = true;
vp9.spatial_layer_resolution_present = false;
vp9.inter_pic_predicted = !key_frame;
vp9.ss_data_available = key_frame;
if (key_frame) {
vp9.spatial_layer_resolution_present = true;
vp9.width[0] = image._encodedWidth;
vp9.height[0] = image._encodedHeight;
vp9.gof.num_frames_in_gof = 1;
vp9.gof.temporal_idx[0] = 0;
vp9.gof.temporal_up_switch[0] = false;
vp9.gof.num_ref_pics[0] = 1;
vp9.gof.pid_diff[0][0] = 1;
}
}
} break;
default:
break;
}
ReturnEncodedImage(image, info, bitstream_buffer_id);
} }
void RTCVideoEncoder::Impl::NotifyError( void RTCVideoEncoder::Impl::NotifyError(
...@@ -1222,6 +1282,7 @@ void RTCVideoEncoder::Impl::RegisterEncodeCompleteCallback( ...@@ -1222,6 +1282,7 @@ void RTCVideoEncoder::Impl::RegisterEncodeCompleteCallback(
void RTCVideoEncoder::Impl::ReturnEncodedImage( void RTCVideoEncoder::Impl::ReturnEncodedImage(
const webrtc::EncodedImage& image, const webrtc::EncodedImage& image,
const webrtc::CodecSpecificInfo& info,
int32_t bitstream_buffer_id) { int32_t bitstream_buffer_id) {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_); DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
DVLOG(3) << __func__ << " bitstream_buffer_id=" << bitstream_buffer_id; DVLOG(3) << __func__ << " bitstream_buffer_id=" << bitstream_buffer_id;
...@@ -1252,35 +1313,6 @@ void RTCVideoEncoder::Impl::ReturnEncodedImage( ...@@ -1252,35 +1313,6 @@ void RTCVideoEncoder::Impl::ReturnEncodedImage(
return; return;
} }
webrtc::CodecSpecificInfo info;
info.codecType = video_codec_type_;
if (video_codec_type_ == webrtc::kVideoCodecVP8) {
info.codecSpecific.VP8.keyIdx = -1;
} else if (video_codec_type_ == webrtc::kVideoCodecVP9) {
bool key_frame = image._frameType == webrtc::VideoFrameType::kVideoFrameKey;
info.codecSpecific.VP9.inter_pic_predicted = key_frame ? false : true;
info.codecSpecific.VP9.flexible_mode = false;
info.codecSpecific.VP9.ss_data_available = key_frame ? true : false;
info.codecSpecific.VP9.temporal_idx = webrtc::kNoTemporalIdx;
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.gof_idx = 0;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.codecSpecific.VP9.end_of_picture = true;
info.codecSpecific.VP9.spatial_layer_resolution_present = false;
if (info.codecSpecific.VP9.ss_data_available) {
info.codecSpecific.VP9.spatial_layer_resolution_present = true;
info.codecSpecific.VP9.width[0] = image._encodedWidth;
info.codecSpecific.VP9.height[0] = image._encodedHeight;
info.codecSpecific.VP9.gof.num_frames_in_gof = 1;
info.codecSpecific.VP9.gof.temporal_idx[0] = 0;
info.codecSpecific.VP9.gof.temporal_up_switch[0] = false;
info.codecSpecific.VP9.gof.num_ref_pics[0] = 1;
info.codecSpecific.VP9.gof.pid_diff[0][0] = 1;
}
}
const auto result = const auto result =
encoded_image_callback_->OnEncodedImage(image, &info, &header); encoded_image_callback_->OnEncodedImage(image, &info, &header);
if (result.error != webrtc::EncodedImageCallback::Result::OK) { if (result.error != webrtc::EncodedImageCallback::Result::OK) {
......
...@@ -161,6 +161,36 @@ class RTCVideoEncoderTest ...@@ -161,6 +161,36 @@ class RTCVideoEncoderTest
return codec; return codec;
} }
webrtc::VideoCodec GetDefaultTemporalLayerCodec() {
const webrtc::VideoCodecType codec_type = webrtc::kVideoCodecVP9;
webrtc::VideoCodec codec{};
codec.codecType = codec_type;
codec.width = kInputFrameWidth;
codec.height = kInputFrameHeight;
codec.startBitrate = kStartBitrate;
codec.maxBitrate = codec.startBitrate * 2;
codec.minBitrate = codec.startBitrate / 2;
codec.maxFramerate = 24;
codec.active = true;
codec.qpMax = 30;
codec.numberOfSimulcastStreams = 1;
codec.mode = webrtc::VideoCodecMode::kRealtimeVideo;
webrtc::VideoCodecVP9& vp9 = *codec.VP9();
vp9.numberOfTemporalLayers = 3;
vp9.numberOfSpatialLayers = 1;
webrtc::SpatialLayer& sl = codec.spatialLayers[0];
sl.width = kInputFrameWidth;
sl.height = kInputFrameHeight;
sl.maxFramerate = 24;
sl.numberOfTemporalLayers = vp9.numberOfTemporalLayers;
sl.targetBitrate = kStartBitrate;
sl.maxBitrate = sl.targetBitrate;
sl.minBitrate = sl.targetBitrate;
sl.qpMax = 30;
sl.active = true;
return codec;
}
void FillFrameBuffer(rtc::scoped_refptr<webrtc::I420Buffer> frame) { void FillFrameBuffer(rtc::scoped_refptr<webrtc::I420Buffer> frame) {
CHECK(libyuv::I420Rect(frame->MutableDataY(), frame->StrideY(), CHECK(libyuv::I420Rect(frame->MutableDataY(), frame->StrideY(),
frame->MutableDataU(), frame->StrideU(), frame->MutableDataU(), frame->StrideU(),
...@@ -189,6 +219,45 @@ class RTCVideoEncoderTest ...@@ -189,6 +219,45 @@ class RTCVideoEncoderTest
media::BitstreamBufferMetadata(0, force_keyframe, frame->timestamp())); media::BitstreamBufferMetadata(0, force_keyframe, frame->timestamp()));
} }
void ReturnTemporalLayerFrameWithVp9Metadata(
scoped_refptr<media::VideoFrame> frame,
bool force_keyframe) {
int32_t bitstream_buffer_id = frame->timestamp().InMicroseconds();
CHECK(0 <= bitstream_buffer_id && bitstream_buffer_id <= 4);
media::BitstreamBufferMetadata metadata(100u /* payload_size_bytes */,
force_keyframe, frame->timestamp());
// Assume the number of TLs is three. TL structure is below.
// TL2: [#1] /-[#3]
// TL1: /_____[#2] /
// TL0: [#0]-----------------[#4]
media::Vp9Metadata vp9;
vp9.has_reference = bitstream_buffer_id != 0 && !force_keyframe;
vp9.temporal_up_switch = bitstream_buffer_id != 3;
switch (bitstream_buffer_id) {
case 0:
vp9.temporal_idx = 0;
break;
case 1:
vp9.temporal_idx = 2;
vp9.p_diffs = {1};
break;
case 2:
vp9.temporal_idx = 1;
vp9.p_diffs = {2};
break;
case 3:
vp9.temporal_idx = 2;
vp9.p_diffs = {1, 3};
break;
case 4:
vp9.temporal_idx = 0;
vp9.p_diffs = {4};
break;
}
metadata.vp9 = vp9;
client_->BitstreamBufferReady(bitstream_buffer_id, metadata);
}
void VerifyTimestamp(uint32_t rtp_timestamp, void VerifyTimestamp(uint32_t rtp_timestamp,
int64_t capture_time_ms, int64_t capture_time_ms,
const webrtc::EncodedImage& encoded_image, const webrtc::EncodedImage& encoded_image,
...@@ -236,34 +305,10 @@ INSTANTIATE_TEST_SUITE_P(CodecProfiles, ...@@ -236,34 +305,10 @@ INSTANTIATE_TEST_SUITE_P(CodecProfiles,
webrtc::kVideoCodecH264)); webrtc::kVideoCodecH264));
TEST_F(RTCVideoEncoderTest, CreateAndInitSucceedsForTemporalLayer) { TEST_F(RTCVideoEncoderTest, CreateAndInitSucceedsForTemporalLayer) {
const webrtc::VideoCodecType codec_type = webrtc::kVideoCodecVP9; webrtc::VideoCodec tl_codec = GetDefaultTemporalLayerCodec();
CreateEncoder(codec_type); CreateEncoder(tl_codec.codecType);
webrtc::VideoCodec codec{}; EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
codec.codecType = codec_type; rtc_encoder_->InitEncode(&tl_codec, 1, 12345));
codec.width = kInputFrameWidth;
codec.height = kInputFrameHeight;
codec.startBitrate = kStartBitrate;
codec.maxBitrate = codec.startBitrate * 2;
codec.minBitrate = codec.startBitrate / 2;
codec.maxFramerate = 24;
codec.active = true;
codec.qpMax = 30;
codec.numberOfSimulcastStreams = 1;
codec.mode = webrtc::VideoCodecMode::kRealtimeVideo;
webrtc::VideoCodecVP9& vp9 = *codec.VP9();
vp9.numberOfTemporalLayers = 3;
vp9.numberOfSpatialLayers = 1;
webrtc::SpatialLayer& sl = codec.spatialLayers[0];
sl.width = kInputFrameWidth;
sl.height = kInputFrameHeight;
sl.maxFramerate = 24;
sl.numberOfTemporalLayers = vp9.numberOfTemporalLayers;
sl.targetBitrate = kStartBitrate;
sl.maxBitrate = sl.targetBitrate;
sl.minBitrate = sl.targetBitrate;
sl.qpMax = 30;
sl.active = true;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, rtc_encoder_->InitEncode(&codec, 1, 12345));
} }
// Checks that WEBRTC_VIDEO_CODEC_FALLBACK_SOFTWARE is returned when there is // Checks that WEBRTC_VIDEO_CODEC_FALLBACK_SOFTWARE is returned when there is
...@@ -379,4 +424,30 @@ TEST_F(RTCVideoEncoderTest, PreserveTimestamps) { ...@@ -379,4 +424,30 @@ TEST_F(RTCVideoEncoderTest, PreserveTimestamps) {
rtc_encoder_->Encode(rtc_frame, &frame_types)); rtc_encoder_->Encode(rtc_frame, &frame_types));
} }
TEST_F(RTCVideoEncoderTest, EncodeTemporalLayer) {
webrtc::VideoCodec tl_codec = GetDefaultTemporalLayerCodec();
CreateEncoder(tl_codec.codecType);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
rtc_encoder_->InitEncode(&tl_codec, 1, 12345));
size_t kNumEncodeFrames = 5u;
EXPECT_CALL(*mock_vea_, Encode(_, _))
.Times(kNumEncodeFrames)
.WillRepeatedly(Invoke(
this, &RTCVideoEncoderTest::ReturnTemporalLayerFrameWithVp9Metadata));
for (size_t i = 0; i < kNumEncodeFrames; i++) {
const rtc::scoped_refptr<webrtc::I420Buffer> buffer =
webrtc::I420Buffer::Create(kInputFrameWidth, kInputFrameHeight);
FillFrameBuffer(buffer);
std::vector<webrtc::VideoFrameType> frame_types;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
rtc_encoder_->Encode(webrtc::VideoFrame::Builder()
.set_video_frame_buffer(buffer)
.set_timestamp_rtp(0)
.set_timestamp_us(i)
.set_rotation(webrtc::kVideoRotation_0)
.build(),
&frame_types));
}
}
} // namespace blink } // namespace blink
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment