Commit 634e7ec2 authored by servolk's avatar servolk Committed by Commit bot

Allow MP4 parser to handle multiple audio and video tracks

Added a new multi-track (2 audio + 2 video tracks) .mp4 file for tests
and MP4StreamParser to handle multiple tracks properly.

BUG=249427, 249428

Review-Url: https://codereview.chromium.org/2254733006
Cr-Commit-Position: refs/heads/master@{#414872}
parent 2063c5ec
......@@ -65,6 +65,9 @@ bool AVC::ConvertFrameToAnnexB(int length_size,
std::vector<uint8_t>* buffer,
std::vector<SubsampleEntry>* subsamples) {
RCHECK(length_size == 1 || length_size == 2 || length_size == 4);
DVLOG(5) << __FUNCTION__ << " length_size=" << length_size
<< " buffer->size()=" << buffer->size()
<< " subsamples=" << (subsamples ? subsamples->size() : 0);
if (length_size == 4)
return ConvertAVCToAnnexBInPlaceForLengthSize4(buffer);
......
......@@ -41,12 +41,8 @@ MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,
highest_end_offset_(0),
has_audio_(false),
has_video_(false),
audio_track_id_(0),
video_track_id_(0),
audio_object_types_(audio_object_types),
has_sbr_(has_sbr),
is_audio_track_encrypted_(false),
is_video_track_encrypted_(false),
num_top_level_box_skipped_(0) {
}
......@@ -186,6 +182,9 @@ bool MP4StreamParser::ParseMoov(BoxReader* reader) {
moov_.reset(new Movie);
RCHECK(moov_->Parse(reader));
runs_.reset();
audio_track_ids_.clear();
video_track_ids_.clear();
is_track_encrypted_.clear();
has_audio_ = false;
has_video_ = false;
......@@ -218,8 +217,6 @@ bool MP4StreamParser::ParseMoov(BoxReader* reader) {
if (track->media.handler.type == kAudio) {
detected_audio_track_count++;
if (audio_config.IsValidConfig())
continue; // Skip other audio tracks once we found a supported one.
RCHECK(!samp_descr.audio_entries.empty());
......@@ -308,20 +305,30 @@ bool MP4StreamParser::ParseMoov(BoxReader* reader) {
return false;
}
is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted;
DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_;
uint32_t audio_track_id = track->header.track_id;
if (audio_track_ids_.find(audio_track_id) != audio_track_ids_.end()) {
MEDIA_LOG(ERROR, media_log_)
<< "Audio track with track_id=" << audio_track_id
<< " already present.";
return false;
}
bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;
is_track_encrypted_[audio_track_id] = is_track_encrypted;
audio_config.Initialize(
codec, sample_format, channel_layout, sample_per_second, extra_data,
is_audio_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted(),
is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted(),
base::TimeDelta(), 0);
DVLOG(1) << "audio_track_id=" << audio_track_id
<< " config=" << audio_config.AsHumanReadableString();
if (!audio_config.IsValidConfig()) {
MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "
<< audio_config.AsHumanReadableString();
return false;
}
has_audio_ = true;
audio_track_id_ = track->header.track_id;
media_tracks->AddAudioTrack(audio_config, audio_track_id_, "main",
audio_track_ids_.insert(audio_track_id);
const char* track_kind = (audio_track_ids_.size() == 1 ? "main" : "");
media_tracks->AddAudioTrack(audio_config, audio_track_id, track_kind,
track->media.handler.name,
track->media.header.language());
continue;
......@@ -329,8 +336,6 @@ bool MP4StreamParser::ParseMoov(BoxReader* reader) {
if (track->media.handler.type == kVideo) {
detected_video_track_count++;
if (video_config.IsValidConfig())
continue; // Skip other video tracks once we found a supported one.
RCHECK(!samp_descr.video_entries.empty());
if (desc_idx >= samp_descr.video_entries.size())
......@@ -361,23 +366,33 @@ bool MP4StreamParser::ParseMoov(BoxReader* reader) {
gfx::Size(track->header.width, track->header.height);
}
is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted;
DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_;
uint32_t video_track_id = track->header.track_id;
if (video_track_ids_.find(video_track_id) != video_track_ids_.end()) {
MEDIA_LOG(ERROR, media_log_)
<< "Video track with track_id=" << video_track_id
<< " already present.";
return false;
}
bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;
is_track_encrypted_[video_track_id] = is_track_encrypted;
video_config.Initialize(
entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,
COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,
// No decoder-specific buffer needed for AVC;
// SPS/PPS are embedded in the video stream
EmptyExtraData(),
is_video_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted());
is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted());
DVLOG(1) << "video_track_id=" << video_track_id
<< " config=" << video_config.AsHumanReadableString();
if (!video_config.IsValidConfig()) {
MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "
<< video_config.AsHumanReadableString();
return false;
}
has_video_ = true;
video_track_id_ = track->header.track_id;
media_tracks->AddVideoTrack(video_config, video_track_id_, "main",
video_track_ids_.insert(video_track_id);
const char* track_kind = (video_track_ids_.size() == 1 ? "main" : "");
media_tracks->AddVideoTrack(video_config, video_track_id, track_kind,
track->media.handler.name,
track->media.header.language());
continue;
......@@ -514,8 +529,10 @@ bool MP4StreamParser::EnqueueSample(BufferQueueMap* buffers, bool* err) {
queue_.Peek(&buf, &buf_size);
if (!buf_size) return false;
bool audio = has_audio_ && audio_track_id_ == runs_->track_id();
bool video = has_video_ && video_track_id_ == runs_->track_id();
bool audio =
audio_track_ids_.find(runs_->track_id()) != audio_track_ids_.end();
bool video =
video_track_ids_.find(runs_->track_id()) != video_track_ids_.end();
// Skip this entire track if it's not one we're interested in
if (!audio && !video) {
......@@ -585,8 +602,7 @@ bool MP4StreamParser::EnqueueSample(BufferQueueMap* buffers, bool* err) {
subsamples));
}
// else, use the existing config.
} else if ((audio && is_audio_track_encrypted_) ||
(video && is_video_track_encrypted_)) {
} else if (is_track_encrypted_[runs_->track_id()]) {
// The media pipeline requires a DecryptConfig with an empty |iv|.
// TODO(ddorwin): Refactor so we do not need a fake key ID ("1");
decrypt_config.reset(
......@@ -596,9 +612,6 @@ bool MP4StreamParser::EnqueueSample(BufferQueueMap* buffers, bool* err) {
StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :
DemuxerStream::VIDEO;
// TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
// type and allow multiple tracks for same media type, if applicable. See
// https://crbug.com/341581.
scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(
&frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,
runs_->track_id());
......@@ -610,7 +623,8 @@ bool MP4StreamParser::EnqueueSample(BufferQueueMap* buffers, bool* err) {
stream_buf->set_timestamp(runs_->cts());
stream_buf->SetDecodeTimestamp(runs_->dts());
DVLOG(3) << "Pushing frame: aud=" << audio
DVLOG(3) << "Emit " << (audio ? "audio" : "video") << " frame: "
<< " track_id=" << runs_->track_id()
<< ", key=" << runs_->is_keyframe()
<< ", dur=" << runs_->duration().InMilliseconds()
<< ", dts=" << runs_->dts().InMilliseconds()
......
......@@ -118,13 +118,12 @@ class MEDIA_EXPORT MP4StreamParser : public StreamParser {
bool has_audio_;
bool has_video_;
uint32_t audio_track_id_;
uint32_t video_track_id_;
std::set<uint32_t> audio_track_ids_;
std::set<uint32_t> video_track_ids_;
// The object types allowed for audio tracks.
std::set<int> audio_object_types_;
bool has_sbr_;
bool is_audio_track_encrypted_;
bool is_video_track_encrypted_;
std::map<uint32_t, bool> is_track_encrypted_;
// Tracks the number of MEDIA_LOGs for skipping top level boxes. Useful to
// prevent log spam.
......
......@@ -125,14 +125,14 @@ class MP4StreamParserTest : public testing::Test {
if (track->type() == MediaTrack::Audio) {
audio_track_id_ = track_id;
audio_decoder_config_ = tracks->getAudioConfig(track_id);
DVLOG(1) << "Audio track " << track_id << " config="
DVLOG(1) << "track_id=" << track_id << " audio config="
<< (audio_decoder_config_.IsValidConfig()
? audio_decoder_config_.AsHumanReadableString()
: "INVALID");
} else if (track->type() == MediaTrack::Video) {
video_track_id_ = track_id;
video_decoder_config_ = tracks->getVideoConfig(track_id);
DVLOG(1) << "Video track " << track_id << " config="
DVLOG(1) << "track_id=" << track_id << " video config="
<< (video_decoder_config_.IsValidConfig()
? video_decoder_config_.AsHumanReadableString()
: "INVALID");
......@@ -143,48 +143,33 @@ class MP4StreamParserTest : public testing::Test {
}
bool NewBuffersF(const StreamParser::BufferQueueMap& buffer_queue_map) {
// Ensure that track ids are properly assigned on all emitted buffers.
DecodeTimestamp lowest_end_dts = kNoDecodeTimestamp();
for (const auto& it : buffer_queue_map) {
DVLOG(3) << "Buffers for track_id=" << it.first;
DCHECK(!it.second.empty());
if (lowest_end_dts == kNoDecodeTimestamp() ||
lowest_end_dts > it.second.back()->GetDecodeTimestamp())
lowest_end_dts = it.second.back()->GetDecodeTimestamp();
for (const auto& buf : it.second) {
DVLOG(3) << " track_id=" << buf->track_id()
<< ", size=" << buf->data_size()
<< ", pts=" << buf->timestamp().InSecondsF()
<< ", dts=" << buf->GetDecodeTimestamp().InSecondsF()
<< ", dur=" << buf->duration().InSecondsF();
// Ensure that track ids are properly assigned on all emitted buffers.
EXPECT_EQ(it.first, buf->track_id());
}
}
const StreamParser::BufferQueue empty_buffers;
const auto& itr_audio = buffer_queue_map.find(audio_track_id_);
const StreamParser::BufferQueue& audio_buffers =
(itr_audio == buffer_queue_map.end()) ? empty_buffers
: itr_audio->second;
const auto& itr_video = buffer_queue_map.find(video_track_id_);
const StreamParser::BufferQueue& video_buffers =
(itr_video == buffer_queue_map.end()) ? empty_buffers
: itr_video->second;
// Find the second highest timestamp so that we know what the
// timestamps on the next set of buffers must be >= than.
DecodeTimestamp audio = !audio_buffers.empty() ?
audio_buffers.back()->GetDecodeTimestamp() : kNoDecodeTimestamp();
DecodeTimestamp video = !video_buffers.empty() ?
video_buffers.back()->GetDecodeTimestamp() : kNoDecodeTimestamp();
DecodeTimestamp second_highest_timestamp =
(audio == kNoDecodeTimestamp() ||
(video != kNoDecodeTimestamp() && audio > video)) ? video : audio;
EXPECT_NE(second_highest_timestamp, kNoDecodeTimestamp());
if (lower_bound_ != kNoDecodeTimestamp() &&
second_highest_timestamp < lower_bound_) {
EXPECT_NE(lowest_end_dts, kNoDecodeTimestamp());
if (lower_bound_ != kNoDecodeTimestamp() && lowest_end_dts < lower_bound_) {
return false;
}
lower_bound_ = second_highest_timestamp;
lower_bound_ = lowest_end_dts;
return true;
}
......@@ -544,5 +529,47 @@ TEST_F(MP4StreamParserTest, TextTrackDetection) {
EXPECT_TRUE(AppendDataInPieces(buffer->data(), buffer->data_size(), 512));
}
TEST_F(MP4StreamParserTest, MultiTrackFile) {
auto params = GetDefaultInitParametersExpectations();
params.duration = base::TimeDelta::FromMilliseconds(4248);
params.liveness = DemuxerStream::LIVENESS_RECORDED;
params.detected_audio_track_count = 2;
params.detected_video_track_count = 2;
InitializeParserWithInitParametersExpectations(params);
EXPECT_MEDIA_LOG(VideoCodecLog("avc1.64000D")).Times(2);
EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2")).Times(2);
ParseMP4File("bbb-320x240-2video-2audio.mp4", 4096);
EXPECT_EQ(media_tracks_->tracks().size(), 4u);
const MediaTrack& video_track1 = *(media_tracks_->tracks()[0]);
EXPECT_EQ(video_track1.type(), MediaTrack::Video);
EXPECT_EQ(video_track1.bytestream_track_id(), 1);
EXPECT_EQ(video_track1.kind(), "main");
EXPECT_EQ(video_track1.label(), "VideoHandler");
EXPECT_EQ(video_track1.language(), "und");
const MediaTrack& audio_track1 = *(media_tracks_->tracks()[1]);
EXPECT_EQ(audio_track1.type(), MediaTrack::Audio);
EXPECT_EQ(audio_track1.bytestream_track_id(), 2);
EXPECT_EQ(audio_track1.kind(), "main");
EXPECT_EQ(audio_track1.label(), "SoundHandler");
EXPECT_EQ(audio_track1.language(), "und");
const MediaTrack& video_track2 = *(media_tracks_->tracks()[2]);
EXPECT_EQ(video_track2.type(), MediaTrack::Video);
EXPECT_EQ(video_track2.bytestream_track_id(), 3);
EXPECT_EQ(video_track2.kind(), "");
EXPECT_EQ(video_track2.label(), "VideoHandler");
EXPECT_EQ(video_track2.language(), "und");
const MediaTrack& audio_track2 = *(media_tracks_->tracks()[3]);
EXPECT_EQ(audio_track2.type(), MediaTrack::Audio);
EXPECT_EQ(audio_track2.bytestream_track_id(), 4);
EXPECT_EQ(audio_track2.kind(), "");
EXPECT_EQ(audio_track2.label(), "SoundHandler");
EXPECT_EQ(audio_track2.language(), "und");
}
} // namespace mp4
} // namespace media
......@@ -235,3 +235,16 @@ media/test/data/bear-1280x720-aac_he.ts
media/test/data/bear-320x240-v_frag-hevc.mp4
HEVC video stream in fragmented MP4 container, generated with
ffmpeg -i bear-320x240.webm -c:v libx265 -an -movflags faststart+frag_keyframe bear-320x240-v_frag-hevc.mp4
// Multi-track MP4 file
// (c) copyright 2008, Blender Foundation / www.bigbuckbunny.org
media/test/data/bbb-320x240-2video-2audio.mp4
Generated using following commands
// Download the source file with 1 video and 1 audio stream.
wget http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_30fps_normal.mp4
// Generate a scaled down to 320x240 video + 2 channel AAC LC audio from the source file.
ffmpeg -i bbb_sunflower_1080p_30fps_normal.mp4 -c:v libx264 -crf 36 -vf scale=320:240 -c:a libfdk_aac -ac 2 -t 24 bbb1.mp4
// Generate a file with the original video scaled down to 320x240 and flipped upside down and sine wave instead of audio.
ffmpeg -i bbb_sunflower_1080p_30fps_normal.mp4 -f lavfi -i "sine=frequency=500:sample_rate=48000" -map 0:v -map 1:a -c:v libx264 -crf 36 -vf scale=320:240,vflip -c:a libfdk_aac -ac 2 -t 24 bbb2.mp4
// Combine the two files generated above into a single fragmented .mp4 file with 2 video and 2 audio tracks.
ffmpeg -i bbb1.mp4 -i bbb2.mp4 -map 0:0 -map 0:1 -map 1:0 -map 1:1 -c:v copy -c:a copy -movflags frag_keyframe+omit_tfhd_offset+separate_moof bbb-320x240-2video-2audio.mp4
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment