Commit 4614245a authored by chcunningham's avatar chcunningham Committed by Commit bot

Parsing of encoded duration for unencrypted opus streams.

BUG=396634

Review URL: https://codereview.chromium.org/883403002

Cr-Commit-Position: refs/heads/master@{#315395}
parent 089a7dad
...@@ -554,6 +554,8 @@ test("media_unittests") { ...@@ -554,6 +554,8 @@ test("media_unittests") {
"formats/common/offset_byte_queue_unittest.cc", "formats/common/offset_byte_queue_unittest.cc",
"formats/webm/cluster_builder.cc", "formats/webm/cluster_builder.cc",
"formats/webm/cluster_builder.h", "formats/webm/cluster_builder.h",
"formats/webm/opus_packet_builder.cc",
"formats/webm/opus_packet_builder.h",
"formats/webm/tracks_builder.cc", "formats/webm/tracks_builder.cc",
"formats/webm/tracks_builder.h", "formats/webm/tracks_builder.h",
"formats/webm/webm_cluster_parser_unittest.cc", "formats/webm/webm_cluster_parser_unittest.cc",
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "base/logging.h"
#include "base/memory/ref_counted.h" #include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h" #include "base/memory/scoped_ptr.h"
#include "media/base/media_export.h" #include "media/base/media_export.h"
...@@ -36,6 +37,11 @@ class LogHelper { ...@@ -36,6 +37,11 @@ class LogHelper {
#define MEDIA_LOG(log_cb) LogHelper(log_cb).stream() #define MEDIA_LOG(log_cb) LogHelper(log_cb).stream()
// Logs only while count < max. Increments count for each log. Use LAZY_STREAM
// to avoid wasteful evaluation of subsequent stream arguments.
#define LIMITED_MEDIA_LOG(log_cb, count, max) \
LAZY_STREAM(MEDIA_LOG(log_cb), (count) < (max) && ((count)++ || true))
class MEDIA_EXPORT MediaLog : public base::RefCountedThreadSafe<MediaLog> { class MEDIA_EXPORT MediaLog : public base::RefCountedThreadSafe<MediaLog> {
public: public:
// Convert various enums to strings. // Convert various enums to strings.
......
// Copyright (c) 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/logging.h"
#include "media/formats/webm/opus_packet_builder.h"
#include "media/formats/webm/webm_cluster_parser.h"
namespace media {
OpusPacket::OpusPacket(uint8_t config, uint8_t frame_count, bool is_VBR) {
DCHECK_GE(config, 0);
DCHECK_LT(config, kNumPossibleOpusConfigs);
DCHECK_GE(frame_count, kMinOpusPacketFrameCount);
DCHECK_LE(frame_count, kMaxOpusPacketFrameCount);
duration_ms_ = frame_count *
WebMClusterParser::kOpusFrameDurationsMu[config] /
static_cast<float>(1000);
uint8_t frame_count_code;
uint8_t frame_count_byte;
if (frame_count == 1) {
frame_count_code = 0;
} else if (frame_count == 2) {
frame_count_code = is_VBR ? 2 : 1;
} else {
frame_count_code = 3;
frame_count_byte = (is_VBR ? 1 << 7 : 0) | frame_count;
}
// All opus packets must have TOC byte.
uint8_t opus_toc_byte = (config << 3) | frame_count_code;
data_.push_back(opus_toc_byte);
// For code 3 packets, the number of frames is signaled in the "frame
// count byte".
if (frame_count_code == 3) {
data_.push_back(frame_count_byte);
}
// Packet will only conform to layout specification for the TOC byte
// and optional frame count bytes appended above. This last byte
// is purely dummy padding where frame size data or encoded data might
// otherwise start.
data_.push_back(static_cast<uint8_t>(0));
}
OpusPacket::~OpusPacket() {
}
const uint8_t* OpusPacket::data() const {
return &(data_[0]);
}
int OpusPacket::size() const {
return data_.size();
}
double OpusPacket::duration_ms() const {
return duration_ms_;
}
ScopedVector<OpusPacket> BuildAllOpusPackets() {
ScopedVector<OpusPacket> opus_packets;
for (int frame_count = kMinOpusPacketFrameCount;
frame_count <= kMaxOpusPacketFrameCount; frame_count++) {
for (int opus_config_num = 0; opus_config_num < kNumPossibleOpusConfigs;
opus_config_num++) {
bool is_VBR = false;
opus_packets.push_back(
new OpusPacket(opus_config_num, frame_count, is_VBR));
if (frame_count >= 2) {
// Add another packet with VBR flag toggled. For frame counts >= 2,
// VBR triggers changes to packet framing.
is_VBR = true;
opus_packets.push_back(
new OpusPacket(opus_config_num, frame_count, is_VBR));
}
}
}
return opus_packets.Pass();
}
} // namespace media
// Copyright (c) 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef MEDIA_FORMATS_WEBM_OPUS_PACKET_BUILDER_H_
#define MEDIA_FORMATS_WEBM_OPUS_PACKET_BUILDER_H_
#include <vector>
#include "base/memory/scoped_ptr.h"
#include "base/memory/scoped_vector.h"
namespace media {
// From Opus RFC. See https://tools.ietf.org/html/rfc6716#page-14
enum OpusConstants {
kNumPossibleOpusConfigs = 32,
kMinOpusPacketFrameCount = 1,
kMaxOpusPacketFrameCount = 48
};
class OpusPacket {
public:
OpusPacket(uint8_t config, uint8_t frame_count, bool is_VBR);
~OpusPacket();
const uint8_t* data() const;
int size() const;
double duration_ms() const;
private:
std::vector<uint8_t> data_;
double duration_ms_;
DISALLOW_COPY_AND_ASSIGN(OpusPacket);
};
// Builds an exhaustive collection of Opus packet configurations.
ScopedVector<OpusPacket> BuildAllOpusPackets();
} // namespace media
#endif // MEDIA_FORMATS_WEBM_OPUS_PACKET_BUILDER_H_
...@@ -17,6 +17,17 @@ ...@@ -17,6 +17,17 @@
namespace media { namespace media {
const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
enum {
// Limits the number of MEDIA_LOG() calls in the path of reading encoded
// duration to avoid spamming for corrupted data.
kMaxDurationLogs = 10,
};
WebMClusterParser::WebMClusterParser( WebMClusterParser::WebMClusterParser(
int64 timecode_scale, int64 timecode_scale,
int audio_track_num, int audio_track_num,
...@@ -27,11 +38,14 @@ WebMClusterParser::WebMClusterParser( ...@@ -27,11 +38,14 @@ WebMClusterParser::WebMClusterParser(
const std::set<int64>& ignored_tracks, const std::set<int64>& ignored_tracks,
const std::string& audio_encryption_key_id, const std::string& audio_encryption_key_id,
const std::string& video_encryption_key_id, const std::string& video_encryption_key_id,
const AudioCodec audio_codec,
const LogCB& log_cb) const LogCB& log_cb)
: timecode_multiplier_(timecode_scale / 1000.0), : num_duration_errors_(0),
timecode_multiplier_(timecode_scale / 1000.0),
ignored_tracks_(ignored_tracks), ignored_tracks_(ignored_tracks),
audio_encryption_key_id_(audio_encryption_key_id), audio_encryption_key_id_(audio_encryption_key_id),
video_encryption_key_id_(video_encryption_key_id), video_encryption_key_id_(video_encryption_key_id),
audio_codec_(audio_codec),
parser_(kWebMIdCluster, this), parser_(kWebMIdCluster, this),
last_block_timecode_(-1), last_block_timecode_(-1),
block_data_size_(-1), block_data_size_(-1),
...@@ -68,7 +82,7 @@ void WebMClusterParser::Reset() { ...@@ -68,7 +82,7 @@ void WebMClusterParser::Reset() {
ready_buffer_upper_bound_ = kNoDecodeTimestamp(); ready_buffer_upper_bound_ = kNoDecodeTimestamp();
} }
int WebMClusterParser::Parse(const uint8* buf, int size) { int WebMClusterParser::Parse(const uint8_t* buf, int size) {
audio_.ClearReadyBuffers(); audio_.ClearReadyBuffers();
video_.ClearReadyBuffers(); video_.ClearReadyBuffers();
ClearTextTrackReadyBuffers(); ClearTextTrackReadyBuffers();
...@@ -140,6 +154,101 @@ WebMClusterParser::GetTextBuffers() { ...@@ -140,6 +154,101 @@ WebMClusterParser::GetTextBuffers() {
return text_buffers_map_; return text_buffers_map_;
} }
base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
const uint8_t* data,
int size) {
// Duration is currently read assuming the *entire* stream is unencrypted.
// The special "Signal Byte" prepended to Blocks in encrypted streams is
// assumed to not be present.
// TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
// to return duration for any unencrypted blocks.
if (audio_codec_ == kCodecOpus) {
return ReadOpusDuration(data, size);
}
// TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
// motivations in http://crbug.com/396634.
return kNoTimestamp();
}
base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
int size) {
// Masks and constants for Opus packets. See
// https://tools.ietf.org/html/rfc6716#page-14
static const uint8_t kTocConfigMask = 0xf8;
static const uint8_t kTocFrameCountCodeMask = 0x03;
static const uint8_t kFrameCountMask = 0x3f;
static const base::TimeDelta kPacketDurationMax =
base::TimeDelta::FromMilliseconds(120);
if (size < 1) {
LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
<< "Invalid zero-byte Opus packet; demuxed block duration may be "
"imprecise.";
return kNoTimestamp();
}
// Frame count type described by last 2 bits of Opus TOC byte.
int frame_count_type = data[0] & kTocFrameCountCodeMask;
int frame_count = 0;
switch (frame_count_type) {
case 0:
frame_count = 1;
break;
case 1:
case 2:
frame_count = 2;
break;
case 3:
// Type 3 indicates an arbitrary frame count described in the next byte.
if (size < 2) {
LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
<< "Second byte missing from 'Code 3' Opus packet; demuxed block "
"duration may be imprecise.";
return kNoTimestamp();
}
frame_count = data[1] & kFrameCountMask;
if (frame_count == 0) {
LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
<< "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
"block duration may be imprecise.";
return kNoTimestamp();
}
break;
default:
LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
<< "Unexpected Opus frame count type: " << frame_count_type << "; "
<< "demuxed block duration may be imprecise.";
return kNoTimestamp();
}
int opusConfig = (data[0] & kTocConfigMask) >> 3;
CHECK_GE(opusConfig, 0);
CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
DCHECK_GT(frame_count, 0);
base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
kOpusFrameDurationsMu[opusConfig] * frame_count);
if (duration > kPacketDurationMax) {
// Intentionally allowing packet to pass through for now. Decoder should
// either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
// things go sideways.
LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
<< "Warning, demuxed Opus packet with encoded duration: " << duration
<< ". Should be no greater than " << kPacketDurationMax;
}
return duration;
}
WebMParserClient* WebMClusterParser::OnListStart(int id) { WebMParserClient* WebMClusterParser::OnListStart(int id) {
if (id == kWebMIdCluster) { if (id == kWebMIdCluster) {
cluster_timecode_ = -1; cluster_timecode_ = -1;
...@@ -205,9 +314,12 @@ bool WebMClusterParser::OnUInt(int id, int64 val) { ...@@ -205,9 +314,12 @@ bool WebMClusterParser::OnUInt(int id, int64 val) {
return true; return true;
} }
bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, bool WebMClusterParser::ParseBlock(bool is_simple_block,
int size, const uint8* additional, const uint8_t* buf,
int additional_size, int duration, int size,
const uint8_t* additional,
int additional_size,
int duration,
int64 discard_padding) { int64 discard_padding) {
if (size < 4) if (size < 4)
return false; return false;
...@@ -233,14 +345,14 @@ bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, ...@@ -233,14 +345,14 @@ bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
if (timecode & 0x8000) if (timecode & 0x8000)
timecode |= ~0xffff; timecode |= ~0xffff;
const uint8* frame_data = buf + 4; const uint8_t* frame_data = buf + 4;
int frame_size = size - (frame_data - buf); int frame_size = size - (frame_data - buf);
return OnBlock(is_simple_block, track_num, timecode, duration, flags, return OnBlock(is_simple_block, track_num, timecode, duration, flags,
frame_data, frame_size, additional, additional_size, frame_data, frame_size, additional, additional_size,
discard_padding); discard_padding);
} }
bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
switch (id) { switch (id) {
case kWebMIdSimpleBlock: case kWebMIdSimpleBlock:
return ParseBlock(true, data, size, NULL, 0, -1, 0); return ParseBlock(true, data, size, NULL, 0, -1, 0);
...@@ -251,7 +363,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { ...@@ -251,7 +363,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
"supported."; "supported.";
return false; return false;
} }
block_data_.reset(new uint8[size]); block_data_.reset(new uint8_t[size]);
memcpy(block_data_.get(), data, size); memcpy(block_data_.get(), data, size);
block_data_size_ = size; block_data_size_ = size;
return true; return true;
...@@ -271,7 +383,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { ...@@ -271,7 +383,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
// element's value in Big Endian format. This is done to mimic ffmpeg // element's value in Big Endian format. This is done to mimic ffmpeg
// demuxer's behavior. // demuxer's behavior.
block_additional_data_size_ = size + sizeof(block_add_id); block_additional_data_size_ = size + sizeof(block_add_id);
block_additional_data_.reset(new uint8[block_additional_data_size_]); block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
memcpy(block_additional_data_.get(), &block_add_id, memcpy(block_additional_data_.get(), &block_add_id,
sizeof(block_add_id)); sizeof(block_add_id));
memcpy(block_additional_data_.get() + 8, data, size); memcpy(block_additional_data_.get() + 8, data, size);
...@@ -294,12 +406,15 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { ...@@ -294,12 +406,15 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
} }
} }
bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, bool WebMClusterParser::OnBlock(bool is_simple_block,
int track_num,
int timecode, int timecode,
int block_duration, int block_duration,
int flags, int flags,
const uint8* data, int size, const uint8_t* data,
const uint8* additional, int additional_size, int size,
const uint8_t* additional,
int additional_size,
int64 discard_padding) { int64 discard_padding) {
DCHECK_GE(size, 0); DCHECK_GE(size, 0);
if (cluster_timecode_ == -1) { if (cluster_timecode_ == -1) {
...@@ -324,9 +439,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, ...@@ -324,9 +439,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
Track* track = NULL; Track* track = NULL;
StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO; StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
std::string encryption_key_id; std::string encryption_key_id;
base::TimeDelta encoded_duration = kNoTimestamp();
if (track_num == audio_.track_num()) { if (track_num == audio_.track_num()) {
track = &audio_; track = &audio_;
encryption_key_id = audio_encryption_key_id_; encryption_key_id = audio_encryption_key_id_;
if (encryption_key_id.empty()) {
encoded_duration = TryGetEncodedAudioDuration(data, size);
}
} else if (track_num == video_.track_num()) { } else if (track_num == video_.track_num()) {
track = &video_; track = &video_;
encryption_key_id = video_encryption_key_id_; encryption_key_id = video_encryption_key_id_;
...@@ -367,7 +486,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, ...@@ -367,7 +486,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
if (!encryption_key_id.empty() && if (!encryption_key_id.empty() &&
!WebMCreateDecryptConfig( !WebMCreateDecryptConfig(
data, size, data, size,
reinterpret_cast<const uint8*>(encryption_key_id.data()), reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
encryption_key_id.size(), encryption_key_id.size(),
&decrypt_config, &data_offset)) { &decrypt_config, &data_offset)) {
return false; return false;
...@@ -387,7 +506,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, ...@@ -387,7 +506,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
std::string id, settings, content; std::string id, settings, content;
WebMWebVTTParser::Parse(data, size, &id, &settings, &content); WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
std::vector<uint8> side_data; std::vector<uint8_t> side_data;
MakeSideData(id.begin(), id.end(), MakeSideData(id.begin(), id.end(),
settings.begin(), settings.end(), settings.begin(), settings.end(),
&side_data); &side_data);
...@@ -396,7 +515,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, ...@@ -396,7 +515,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
// type with remapped bytestream track numbers and allow multiple tracks as // type with remapped bytestream track numbers and allow multiple tracks as
// applicable. See https://crbug.com/341581. // applicable. See https://crbug.com/341581.
buffer = StreamParserBuffer::CopyFrom( buffer = StreamParserBuffer::CopyFrom(
reinterpret_cast<const uint8*>(content.data()), reinterpret_cast<const uint8_t*>(content.data()),
content.length(), content.length(),
&side_data[0], &side_data[0],
side_data.size(), side_data.size(),
...@@ -407,9 +526,47 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, ...@@ -407,9 +526,47 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
if (cluster_start_time_ == kNoTimestamp()) if (cluster_start_time_ == kNoTimestamp())
cluster_start_time_ = timestamp; cluster_start_time_ = timestamp;
base::TimeDelta block_duration_time_delta = kNoTimestamp();
if (block_duration >= 0) { if (block_duration >= 0) {
buffer->set_duration(base::TimeDelta::FromMicroseconds( block_duration_time_delta = base::TimeDelta::FromMicroseconds(
block_duration * timecode_multiplier_)); block_duration * timecode_multiplier_);
}
// Prefer encoded duration over BlockGroup->BlockDuration or
// TrackEntry->DefaultDuration when available. This layering violation is a
// workaround for http://crbug.com/396634, decreasing the likelihood of
// fall-back to rough estimation techniques for Blocks that lack a
// BlockDuration at the end of a cluster. Cross cluster durations are not
// feasible given flexibility of cluster ordering and MSE APIs. Duration
// estimation may still apply in cases of encryption and codecs for which
// we do not extract encoded duration. Within a cluster, estimates are applied
// as Block Timecode deltas, or once the whole cluster is parsed in the case
// of the last Block in the cluster. See Track::AddBuffer and
// ApplyDurationEstimateIfNeeded().
if (encoded_duration != kNoTimestamp()) {
DCHECK(encoded_duration != kInfiniteDuration());
DCHECK(encoded_duration > base::TimeDelta());
buffer->set_duration(encoded_duration);
DVLOG(3) << __FUNCTION__ << " : "
<< "Using encoded duration " << encoded_duration.InSecondsF();
if (block_duration_time_delta != kNoTimestamp()) {
base::TimeDelta duration_difference =
block_duration_time_delta - encoded_duration;
const auto kWarnDurationDiff =
base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
if (duration_difference.magnitude() > kWarnDurationDiff) {
LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
<< "BlockDuration "
<< "(" << block_duration_time_delta << ") "
<< "differs significantly from encoded duration "
<< "(" << encoded_duration << ").";
}
}
} else if (block_duration_time_delta != kNoTimestamp()) {
buffer->set_duration(block_duration_time_delta);
} else { } else {
DCHECK_NE(buffer_type, DemuxerStream::TEXT); DCHECK_NE(buffer_type, DemuxerStream::TEXT);
buffer->set_duration(track->default_duration()); buffer->set_duration(track->default_duration());
...@@ -549,7 +706,7 @@ void WebMClusterParser::Track::Reset() { ...@@ -549,7 +706,7 @@ void WebMClusterParser::Track::Reset() {
last_added_buffer_missing_duration_ = NULL; last_added_buffer_missing_duration_ = NULL;
} }
bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const { bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
// For now, assume that all blocks are keyframes for datatypes other than // For now, assume that all blocks are keyframes for datatypes other than
// video. This is a valid assumption for Vorbis, WebVTT, & Opus. // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
if (!is_video_) if (!is_video_)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <string> #include <string>
#include "base/memory/scoped_ptr.h" #include "base/memory/scoped_ptr.h"
#include "media/base/audio_decoder_config.h"
#include "media/base/media_export.h" #include "media/base/media_export.h"
#include "media/base/media_log.h" #include "media/base/media_log.h"
#include "media/base/stream_parser.h" #include "media/base/stream_parser.h"
...@@ -28,13 +29,17 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { ...@@ -28,13 +29,17 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
// Arbitrarily-chosen numbers to estimate the duration of a buffer if none is // Arbitrarily-chosen numbers to estimate the duration of a buffer if none is
// set and there is not enough information to get a better estimate. // set and there is not enough information to get a better estimate.
// TODO(wolenetz/acolwell): Parse audio codebook to determine missing audio
// frame durations. See http://crbug.com/351166.
enum { enum {
kDefaultAudioBufferDurationInMs = 23, // Common 1k samples @44.1kHz kDefaultAudioBufferDurationInMs = 23, // Common 1k samples @44.1kHz
kDefaultVideoBufferDurationInMs = 42 // Low 24fps to reduce stalls kDefaultVideoBufferDurationInMs = 42 // Low 24fps to reduce stalls
}; };
// Opus packets encode the duration and other parameters in the 5 most
// significant bits of the first byte. The index in this array corresponds
// to the duration of each frame of the packet in microseconds. See
// https://tools.ietf.org/html/rfc6716#page-14
static const uint16_t kOpusFrameDurationsMu[];
private: private:
// Helper class that manages per-track state. // Helper class that manages per-track state.
class Track { class Track {
...@@ -87,7 +92,7 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { ...@@ -87,7 +92,7 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
// block is a keyframe. // block is a keyframe.
// |data| contains the bytes in the block. // |data| contains the bytes in the block.
// |size| indicates the number of bytes in |data|. // |size| indicates the number of bytes in |data|.
bool IsKeyframe(const uint8* data, int size) const; bool IsKeyframe(const uint8_t* data, int size) const;
base::TimeDelta default_duration() const { return default_duration_; } base::TimeDelta default_duration() const { return default_duration_; }
...@@ -143,6 +148,7 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { ...@@ -143,6 +148,7 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
const std::set<int64>& ignored_tracks, const std::set<int64>& ignored_tracks,
const std::string& audio_encryption_key_id, const std::string& audio_encryption_key_id,
const std::string& video_encryption_key_id, const std::string& video_encryption_key_id,
const AudioCodec audio_codec_,
const LogCB& log_cb); const LogCB& log_cb);
~WebMClusterParser() override; ~WebMClusterParser() override;
...@@ -154,7 +160,7 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { ...@@ -154,7 +160,7 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
// Returns -1 if the parse fails. // Returns -1 if the parse fails.
// Returns 0 if more data is needed. // Returns 0 if more data is needed.
// Returns the number of bytes parsed on success. // Returns the number of bytes parsed on success.
int Parse(const uint8* buf, int size); int Parse(const uint8_t* buf, int size);
base::TimeDelta cluster_start_time() const { return cluster_start_time_; } base::TimeDelta cluster_start_time() const { return cluster_start_time_; }
...@@ -194,14 +200,24 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { ...@@ -194,14 +200,24 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
WebMParserClient* OnListStart(int id) override; WebMParserClient* OnListStart(int id) override;
bool OnListEnd(int id) override; bool OnListEnd(int id) override;
bool OnUInt(int id, int64 val) override; bool OnUInt(int id, int64 val) override;
bool OnBinary(int id, const uint8* data, int size) override; bool OnBinary(int id, const uint8_t* data, int size) override;
bool ParseBlock(bool is_simple_block, const uint8* buf, int size, bool ParseBlock(bool is_simple_block,
const uint8* additional, int additional_size, int duration, const uint8_t* buf,
int size,
const uint8_t* additional,
int additional_size,
int duration,
int64 discard_padding); int64 discard_padding);
bool OnBlock(bool is_simple_block, int track_num, int timecode, int duration, bool OnBlock(bool is_simple_block,
int flags, const uint8* data, int size, int track_num,
const uint8* additional, int additional_size, int timecode,
int duration,
int flags,
const uint8_t* data,
int size,
const uint8_t* additional,
int additional_size,
int64 discard_padding); int64 discard_padding);
// Resets the Track objects associated with each text track. // Resets the Track objects associated with each text track.
...@@ -227,21 +243,40 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient { ...@@ -227,21 +243,40 @@ class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
// if that track num is not a text track. // if that track num is not a text track.
Track* FindTextTrack(int track_num); Track* FindTextTrack(int track_num);
// Attempts to read the duration from the encoded audio data, returning as
// TimeDelta or kNoTimestamp() if duration cannot be retrieved. This obviously
// violates layering rules, but is useful for MSE to know duration in cases
// where it isn't explicitly given and cannot be calculated for Blocks at the
// end of a Cluster (the next Cluster in playback-order may not be the next
// Cluster we parse, so we can't simply use the delta of the first Block in
// the next Cluster). Avoid calling if encrypted; may produce unexpected
// output. See implementation for supported codecs.
base::TimeDelta TryGetEncodedAudioDuration(const uint8_t* data, int size);
// Reads Opus packet header to determine packet duration. Duration returned
// as TimeDelta or kNoTimestamp() upon failure to read duration from packet.
base::TimeDelta ReadOpusDuration(const uint8_t* data, int size);
// Tracks the number of MEDIA_LOGs made in process of reading encoded
// duration. Useful to prevent log spam.
int num_duration_errors_;
double timecode_multiplier_; // Multiplier used to convert timecodes into double timecode_multiplier_; // Multiplier used to convert timecodes into
// microseconds. // microseconds.
std::set<int64> ignored_tracks_; std::set<int64> ignored_tracks_;
std::string audio_encryption_key_id_; std::string audio_encryption_key_id_;
std::string video_encryption_key_id_; std::string video_encryption_key_id_;
const AudioCodec audio_codec_;
WebMListParser parser_; WebMListParser parser_;
int64 last_block_timecode_; int64 last_block_timecode_;
scoped_ptr<uint8[]> block_data_; scoped_ptr<uint8_t[]> block_data_;
int block_data_size_; int block_data_size_;
int64 block_duration_; int64 block_duration_;
int64 block_add_id_; int64 block_add_id_;
scoped_ptr<uint8[]> block_additional_data_; scoped_ptr<uint8_t[]> block_additional_data_;
// Must be 0 if |block_additional_data_| is null. Must be > 0 if // Must be 0 if |block_additional_data_| is null. Must be > 0 if
// |block_additional_data_| is NOT null. // |block_additional_data_| is NOT null.
int block_additional_data_size_; int block_additional_data_size_;
......
...@@ -4,11 +4,14 @@ ...@@ -4,11 +4,14 @@
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <vector>
#include "base/bind.h" #include "base/bind.h"
#include "base/logging.h" #include "base/logging.h"
#include "media/base/audio_decoder_config.h"
#include "media/base/decrypt_config.h" #include "media/base/decrypt_config.h"
#include "media/formats/webm/cluster_builder.h" #include "media/formats/webm/cluster_builder.h"
#include "media/formats/webm/opus_packet_builder.h"
#include "media/formats/webm/webm_cluster_parser.h" #include "media/formats/webm/webm_cluster_parser.h"
#include "media/formats/webm/webm_constants.h" #include "media/formats/webm/webm_constants.h"
#include "testing/gmock/include/gmock/gmock.h" #include "testing/gmock/include/gmock/gmock.h"
...@@ -31,6 +34,8 @@ enum { ...@@ -31,6 +34,8 @@ enum {
kTestVideoFrameDefaultDurationInMs = 17 kTestVideoFrameDefaultDurationInMs = 17
}; };
// Test duration defaults must differ from parser estimation defaults to know
// which durations parser used when emitting buffers.
static_assert( static_assert(
static_cast<int>(kTestAudioFrameDefaultDurationInMs) != static_cast<int>(kTestAudioFrameDefaultDurationInMs) !=
static_cast<int>(WebMClusterParser::kDefaultAudioBufferDurationInMs), static_cast<int>(WebMClusterParser::kDefaultAudioBufferDurationInMs),
...@@ -49,21 +54,27 @@ struct BlockInfo { ...@@ -49,21 +54,27 @@ struct BlockInfo {
// this BlockGroup. The absolute value is used for parser verification. // this BlockGroup. The absolute value is used for parser verification.
// For simple blocks, this value must be non-negative, and is used only for // For simple blocks, this value must be non-negative, and is used only for
// parser verification. // parser verification.
int duration; double duration;
bool use_simple_block; bool use_simple_block;
// Default data will be used if no data given.
const uint8_t* data;
int data_length;
}; };
static const BlockInfo kDefaultBlockInfo[] = { static const BlockInfo kDefaultBlockInfo[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 23, true }, {kAudioTrackNum, 23, 23, true, NULL, 0},
{ kVideoTrackNum, 33, 34, true }, // Assumes not using DefaultDuration // Assumes not using DefaultDuration
{ kAudioTrackNum, 46, 23, true }, {kVideoTrackNum, 33, 34, true, NULL, 0},
{ kVideoTrackNum, 67, 33, false }, {kAudioTrackNum, 46, 23, true, NULL, 0},
{ kAudioTrackNum, 69, 23, false }, {kVideoTrackNum, 67, 33, false, NULL, 0},
{ kVideoTrackNum, 100, 33, false }, {kAudioTrackNum, 69, 23, false, NULL, 0},
{kVideoTrackNum, 100, 33, false, NULL, 0},
}; };
static const uint8 kEncryptedFrame[] = { static const uint8_t kEncryptedFrame[] = {
0x01, // Block is encrypted 0x01, // Block is encrypted
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 // IV 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 // IV
}; };
...@@ -74,27 +85,35 @@ static scoped_ptr<Cluster> CreateCluster(int timecode, ...@@ -74,27 +85,35 @@ static scoped_ptr<Cluster> CreateCluster(int timecode,
ClusterBuilder cb; ClusterBuilder cb;
cb.SetClusterTimecode(0); cb.SetClusterTimecode(0);
uint8_t kDefaultBlockData[] = { 0x00 };
for (int i = 0; i < block_count; i++) { for (int i = 0; i < block_count; i++) {
uint8 data[] = { 0x00 }; const uint8_t* data;
int data_length;
if (block_info[i].data != NULL) {
data = block_info[i].data;
data_length = block_info[i].data_length;
} else {
data = kDefaultBlockData;
data_length = sizeof(kDefaultBlockData);
}
if (block_info[i].use_simple_block) { if (block_info[i].use_simple_block) {
CHECK_GE(block_info[i].duration, 0); CHECK_GE(block_info[i].duration, 0);
cb.AddSimpleBlock(block_info[i].track_num, cb.AddSimpleBlock(block_info[i].track_num, block_info[i].timestamp, 0,
block_info[i].timestamp, data, data_length);
0, data, sizeof(data));
continue; continue;
} }
if (block_info[i].duration < 0) { if (block_info[i].duration < 0) {
cb.AddBlockGroupWithoutBlockDuration(block_info[i].track_num, cb.AddBlockGroupWithoutBlockDuration(block_info[i].track_num,
block_info[i].timestamp, block_info[i].timestamp, 0, data,
0, data, sizeof(data)); data_length);
continue; continue;
} }
cb.AddBlockGroup(block_info[i].track_num, cb.AddBlockGroup(block_info[i].track_num, block_info[i].timestamp,
block_info[i].timestamp, block_info[i].duration, 0, data, data_length);
block_info[i].duration,
0, data, sizeof(data));
} }
return cb.Finish(); return cb.Finish();
...@@ -161,7 +180,7 @@ static bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers, ...@@ -161,7 +180,7 @@ static bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers,
EXPECT_EQ(block_info[i].timestamp, buffer->timestamp().InMilliseconds()); EXPECT_EQ(block_info[i].timestamp, buffer->timestamp().InMilliseconds());
EXPECT_EQ(std::abs(block_info[i].duration), EXPECT_EQ(std::abs(block_info[i].duration),
buffer->duration().InMilliseconds()); buffer->duration().InMillisecondsF());
EXPECT_EQ(expected_type, buffer->type()); EXPECT_EQ(expected_type, buffer->type());
EXPECT_EQ(block_info[i].track_num, buffer->track_id()); EXPECT_EQ(block_info[i].track_num, buffer->track_id());
} }
...@@ -212,7 +231,7 @@ static bool VerifyTextBuffers( ...@@ -212,7 +231,7 @@ static bool VerifyTextBuffers(
const scoped_refptr<StreamParserBuffer> buffer = *buffer_iter++; const scoped_refptr<StreamParserBuffer> buffer = *buffer_iter++;
EXPECT_EQ(block_info.timestamp, buffer->timestamp().InMilliseconds()); EXPECT_EQ(block_info.timestamp, buffer->timestamp().InMilliseconds());
EXPECT_EQ(std::abs(block_info.duration), EXPECT_EQ(std::abs(block_info.duration),
buffer->duration().InMilliseconds()); buffer->duration().InMillisecondsF());
EXPECT_EQ(DemuxerStream::TEXT, buffer->type()); EXPECT_EQ(DemuxerStream::TEXT, buffer->type());
EXPECT_EQ(text_track_num, buffer->track_id()); EXPECT_EQ(text_track_num, buffer->track_id());
} }
...@@ -248,6 +267,7 @@ class WebMClusterParserTest : public testing::Test { ...@@ -248,6 +267,7 @@ class WebMClusterParserTest : public testing::Test {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())) {} LogCB())) {}
protected: protected:
...@@ -270,6 +290,7 @@ class WebMClusterParserTest : public testing::Test { ...@@ -270,6 +290,7 @@ class WebMClusterParserTest : public testing::Test {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
} }
...@@ -305,18 +326,19 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) { ...@@ -305,18 +326,19 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
const BlockInfo kBlockInfo[] = { const BlockInfo kBlockInfo[] = {
{ kVideoTrackNum, 0, 33, true }, {kVideoTrackNum, 0, 33, true, NULL, 0},
{ kAudioTrackNum, 0, 23, false }, {kAudioTrackNum, 0, 23, false, NULL, 0},
{ kTextTrackNum, 10, 42, false }, {kTextTrackNum, 10, 42, false, NULL, 0},
{ kAudioTrackNum, 23, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 23, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kVideoTrackNum, 33, 33, true }, {kVideoTrackNum, 33, 33, true, NULL, 0},
{ kAudioTrackNum, 36, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 36, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kVideoTrackNum, 66, 33, true }, {kVideoTrackNum, 66, 33, true, NULL, 0},
{ kAudioTrackNum, 70, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 70, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
}; };
const int kExpectedBuffersOnPartialCluster[] = { const int kExpectedBuffersOnPartialCluster[] = {
...@@ -407,7 +429,7 @@ TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) { ...@@ -407,7 +429,7 @@ TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) {
WebMClusterParser::BufferQueue video_buffers; WebMClusterParser::BufferQueue video_buffers;
const WebMClusterParser::BufferQueue no_text_buffers; const WebMClusterParser::BufferQueue no_text_buffers;
const uint8* data = cluster->data(); const uint8_t* data = cluster->data();
int size = cluster->size(); int size = cluster->size();
int default_parse_size = 3; int default_parse_size = 3;
int parse_size = std::min(default_parse_size, size); int parse_size = std::min(default_parse_size, size);
...@@ -444,12 +466,12 @@ TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) { ...@@ -444,12 +466,12 @@ TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) {
// one of these scenarios. // one of these scenarios.
TEST_F(WebMClusterParserTest, ParseBlockGroup) { TEST_F(WebMClusterParserTest, ParseBlockGroup) {
const BlockInfo kBlockInfo[] = { const BlockInfo kBlockInfo[] = {
{ kAudioTrackNum, 0, 23, false }, {kAudioTrackNum, 0, 23, false, NULL, 0},
{ kVideoTrackNum, 33, 34, false }, {kVideoTrackNum, 33, 34, false, NULL, 0},
}; };
int block_count = arraysize(kBlockInfo); int block_count = arraysize(kBlockInfo);
const uint8 kClusterData[] = { const uint8_t kClusterData[] = {
0x1F, 0x43, 0xB6, 0x75, 0x9B, // Cluster(size=27) 0x1F, 0x43, 0xB6, 0x75, 0x9B, // Cluster(size=27)
0xE7, 0x81, 0x00, // Timecode(size=1, value=0) 0xE7, 0x81, 0x00, // Timecode(size=1, value=0)
// BlockGroup with BlockDuration before Block. // BlockGroup with BlockDuration before Block.
...@@ -470,11 +492,11 @@ TEST_F(WebMClusterParserTest, ParseBlockGroup) { ...@@ -470,11 +492,11 @@ TEST_F(WebMClusterParserTest, ParseBlockGroup) {
TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) { TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) {
const BlockInfo kBlockInfo[] = { const BlockInfo kBlockInfo[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 23, false }, {kAudioTrackNum, 23, 23, false, NULL, 0},
{ kVideoTrackNum, 33, 34, true }, {kVideoTrackNum, 33, 34, true, NULL, 0},
{ kAudioTrackNum, 46, 23, false }, {kAudioTrackNum, 46, 23, false, NULL, 0},
{ kVideoTrackNum, 67, 33, false }, {kVideoTrackNum, 67, 33, false, NULL, 0},
}; };
int block_count = arraysize(kBlockInfo); int block_count = arraysize(kBlockInfo);
scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count)); scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
...@@ -497,24 +519,25 @@ TEST_F(WebMClusterParserTest, IgnoredTracks) { ...@@ -497,24 +519,25 @@ TEST_F(WebMClusterParserTest, IgnoredTracks) {
ignored_tracks, ignored_tracks,
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
const BlockInfo kInputBlockInfo[] = { const BlockInfo kInputBlockInfo[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 23, true }, {kAudioTrackNum, 23, 23, true, NULL, 0},
{ kVideoTrackNum, 33, 34, true }, {kVideoTrackNum, 33, 34, true, NULL, 0},
{ kTextTrackNum, 33, 99, true }, {kTextTrackNum, 33, 99, true, NULL, 0},
{ kAudioTrackNum, 46, 23, true }, {kAudioTrackNum, 46, 23, true, NULL, 0},
{ kVideoTrackNum, 67, 34, true }, {kVideoTrackNum, 67, 34, true, NULL, 0},
}; };
int input_block_count = arraysize(kInputBlockInfo); int input_block_count = arraysize(kInputBlockInfo);
const BlockInfo kOutputBlockInfo[] = { const BlockInfo kOutputBlockInfo[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 23, true }, {kAudioTrackNum, 23, 23, true, NULL, 0},
{ kVideoTrackNum, 33, 34, true }, {kVideoTrackNum, 33, 34, true, NULL, 0},
{ kAudioTrackNum, 46, 23, true }, {kAudioTrackNum, 46, 23, true, NULL, 0},
{ kVideoTrackNum, 67, 34, true }, {kVideoTrackNum, 67, 34, true, NULL, 0},
}; };
int output_block_count = arraysize(kOutputBlockInfo); int output_block_count = arraysize(kOutputBlockInfo);
...@@ -542,16 +565,17 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) { ...@@ -542,16 +565,17 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
const BlockInfo kInputBlockInfo[] = { const BlockInfo kInputBlockInfo[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 23, true }, {kAudioTrackNum, 23, 23, true, NULL, 0},
{ kVideoTrackNum, 33, 34, true }, {kVideoTrackNum, 33, 34, true, NULL, 0},
{ kTextTrackNum, 33, 42, false }, {kTextTrackNum, 33, 42, false, NULL, 0},
{ kAudioTrackNum, 46, 23, true }, {kAudioTrackNum, 46, 23, true, NULL, 0},
{ kTextTrackNum, 55, 44, false }, {kTextTrackNum, 55, 44, false, NULL, 0},
{ kVideoTrackNum, 67, 34, true }, {kVideoTrackNum, 67, 34, true, NULL, 0},
}; };
int input_block_count = arraysize(kInputBlockInfo); int input_block_count = arraysize(kInputBlockInfo);
...@@ -579,6 +603,7 @@ TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) { ...@@ -579,6 +603,7 @@ TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
const BlockInfo kInputBlockInfo[] = { const BlockInfo kInputBlockInfo[] = {
...@@ -616,17 +641,18 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) { ...@@ -616,17 +641,18 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
const BlockInfo kInputBlockInfo[] = { const BlockInfo kInputBlockInfo[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 23, true }, {kAudioTrackNum, 23, 23, true, NULL, 0},
{ kVideoTrackNum, 33, 34, true }, {kVideoTrackNum, 33, 34, true, NULL, 0},
{ kSubtitleTextTrackNum, 33, 42, false }, {kSubtitleTextTrackNum, 33, 42, false, NULL, 0},
{ kAudioTrackNum, 46, 23, true }, {kAudioTrackNum, 46, 23, true, NULL, 0},
{ kCaptionTextTrackNum, 55, 44, false }, {kCaptionTextTrackNum, 55, 44, false, NULL, 0},
{ kVideoTrackNum, 67, 34, true }, {kVideoTrackNum, 67, 34, true, NULL, 0},
{ kSubtitleTextTrackNum, 67, 33, false }, {kSubtitleTextTrackNum, 67, 33, false, NULL, 0},
}; };
int input_block_count = arraysize(kInputBlockInfo); int input_block_count = arraysize(kInputBlockInfo);
...@@ -662,6 +688,7 @@ TEST_F(WebMClusterParserTest, ParseEncryptedBlock) { ...@@ -662,6 +688,7 @@ TEST_F(WebMClusterParserTest, ParseEncryptedBlock) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
"video_key_id", "video_key_id",
kUnknownAudioCodec,
LogCB())); LogCB()));
int result = parser_->Parse(cluster->data(), cluster->size()); int result = parser_->Parse(cluster->data(), cluster->size());
EXPECT_EQ(cluster->size(), result); EXPECT_EQ(cluster->size(), result);
...@@ -683,13 +710,14 @@ TEST_F(WebMClusterParserTest, ParseBadEncryptedBlock) { ...@@ -683,13 +710,14 @@ TEST_F(WebMClusterParserTest, ParseBadEncryptedBlock) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
"video_key_id", "video_key_id",
kUnknownAudioCodec,
LogCB())); LogCB()));
int result = parser_->Parse(cluster->data(), cluster->size()); int result = parser_->Parse(cluster->data(), cluster->size());
EXPECT_EQ(-1, result); EXPECT_EQ(-1, result);
} }
TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) { TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) {
const uint8 kBuffer[] = { const uint8_t kBuffer[] = {
0x1F, 0x43, 0xB6, 0x75, 0x80, // CLUSTER (size = 0) 0x1F, 0x43, 0xB6, 0x75, 0x80, // CLUSTER (size = 0)
}; };
...@@ -697,7 +725,7 @@ TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) { ...@@ -697,7 +725,7 @@ TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) {
} }
TEST_F(WebMClusterParserTest, ParseInvalidUnknownButActuallyZeroSizedCluster) { TEST_F(WebMClusterParserTest, ParseInvalidUnknownButActuallyZeroSizedCluster) {
const uint8 kBuffer[] = { const uint8_t kBuffer[] = {
0x1F, 0x43, 0xB6, 0x75, 0xFF, // CLUSTER (size = "unknown") 0x1F, 0x43, 0xB6, 0x75, 0xFF, // CLUSTER (size = "unknown")
0x1F, 0x43, 0xB6, 0x75, 0x85, // CLUSTER (size = 5) 0x1F, 0x43, 0xB6, 0x75, 0x85, // CLUSTER (size = 5)
}; };
...@@ -722,6 +750,7 @@ TEST_F(WebMClusterParserTest, ParseInvalidTextBlockGroupWithoutDuration) { ...@@ -722,6 +750,7 @@ TEST_F(WebMClusterParserTest, ParseInvalidTextBlockGroupWithoutDuration) {
std::set<int64>(), std::set<int64>(),
std::string(), std::string(),
std::string(), std::string(),
kUnknownAudioCodec,
LogCB())); LogCB()));
const BlockInfo kBlockInfo[] = { const BlockInfo kBlockInfo[] = {
...@@ -741,13 +770,13 @@ TEST_F(WebMClusterParserTest, ParseWithDefaultDurationsSimpleBlocks) { ...@@ -741,13 +770,13 @@ TEST_F(WebMClusterParserTest, ParseWithDefaultDurationsSimpleBlocks) {
EXPECT_LT(kTestVideoFrameDefaultDurationInMs, 33); EXPECT_LT(kTestVideoFrameDefaultDurationInMs, 33);
const BlockInfo kBlockInfo[] = { const BlockInfo kBlockInfo[] = {
{ kAudioTrackNum, 0, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 0, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kAudioTrackNum, 23, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 23, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kVideoTrackNum, 33, kTestVideoFrameDefaultDurationInMs, true }, {kVideoTrackNum, 33, kTestVideoFrameDefaultDurationInMs, true, NULL, 0},
{ kAudioTrackNum, 46, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 46, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kVideoTrackNum, 67, kTestVideoFrameDefaultDurationInMs, true }, {kVideoTrackNum, 67, kTestVideoFrameDefaultDurationInMs, true, NULL, 0},
{ kAudioTrackNum, 69, kTestAudioFrameDefaultDurationInMs, true }, {kAudioTrackNum, 69, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
{ kVideoTrackNum, 100, kTestVideoFrameDefaultDurationInMs, true }, {kVideoTrackNum, 100, kTestVideoFrameDefaultDurationInMs, true, NULL, 0},
}; };
int block_count = arraysize(kBlockInfo); int block_count = arraysize(kBlockInfo);
...@@ -778,13 +807,15 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) { ...@@ -778,13 +807,15 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
// as the lowest non-zero duration seen so far if the last buffer in the track // as the lowest non-zero duration seen so far if the last buffer in the track
// in the cluster (independently for each track in the cluster). // in the cluster (independently for each track in the cluster).
const BlockInfo kBlockInfo1[] = { const BlockInfo kBlockInfo1[] = {
{ kAudioTrackNum, 0, 23, true }, {kAudioTrackNum, 0, 23, true, NULL, 0},
{ kAudioTrackNum, 23, 22, true }, {kAudioTrackNum, 23, 22, true, NULL, 0},
{ kVideoTrackNum, 33, 33, true }, {kVideoTrackNum, 33, 33, true, NULL, 0},
{ kAudioTrackNum, 45, 23, true }, {kAudioTrackNum, 45, 23, true, NULL, 0},
{ kVideoTrackNum, 66, 34, true }, {kVideoTrackNum, 66, 34, true, NULL, 0},
{ kAudioTrackNum, 68, 22, true }, // Estimated from minimum audio dur // Estimated from minimum audio dur
{ kVideoTrackNum, 100, 33, true }, // Estimated from minimum video dur {kAudioTrackNum, 68, 22, true, NULL, 0},
// Estimated from minimum video dur
{kVideoTrackNum, 100, 33, true, NULL, 0},
}; };
int block_count1 = arraysize(kBlockInfo1); int block_count1 = arraysize(kBlockInfo1);
...@@ -812,8 +843,10 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) { ...@@ -812,8 +843,10 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
// Verify that the estimated frame duration is tracked across clusters for // Verify that the estimated frame duration is tracked across clusters for
// each track. // each track.
const BlockInfo kBlockInfo2[] = { const BlockInfo kBlockInfo2[] = {
{ kAudioTrackNum, 200, 22, true }, // Estimate carries over across clusters // Estimate carries over across clusters
{ kVideoTrackNum, 201, 33, true }, // Estimate carries over across clusters {kAudioTrackNum, 200, 22, true, NULL, 0},
// Estimate carries over across clusters
{kVideoTrackNum, 201, 33, true, NULL, 0},
}; };
int block_count2 = arraysize(kBlockInfo2); int block_count2 = arraysize(kBlockInfo2);
...@@ -832,13 +865,15 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) { ...@@ -832,13 +865,15 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) {
// the last buffer in the track in the cluster (independently for each track // the last buffer in the track in the cluster (independently for each track
// in the cluster). // in the cluster).
const BlockInfo kBlockInfo1[] = { const BlockInfo kBlockInfo1[] = {
{ kAudioTrackNum, 0, -23, false }, {kAudioTrackNum, 0, -23, false, NULL, 0},
{ kAudioTrackNum, 23, -22, false }, {kAudioTrackNum, 23, -22, false, NULL, 0},
{ kVideoTrackNum, 33, -33, false }, {kVideoTrackNum, 33, -33, false, NULL, 0},
{ kAudioTrackNum, 45, -23, false }, {kAudioTrackNum, 45, -23, false, NULL, 0},
{ kVideoTrackNum, 66, -34, false }, {kVideoTrackNum, 66, -34, false, NULL, 0},
{ kAudioTrackNum, 68, -22, false }, // Estimated from minimum audio dur // Estimated from minimum audio dur
{ kVideoTrackNum, 100, -33, false }, // Estimated from minimum video dur {kAudioTrackNum, 68, -22, false, NULL, 0},
// Estimated from minimum video dur
{kVideoTrackNum, 100, -33, false, NULL, 0},
}; };
int block_count1 = arraysize(kBlockInfo1); int block_count1 = arraysize(kBlockInfo1);
...@@ -866,8 +901,8 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) { ...@@ -866,8 +901,8 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) {
// Verify that the estimated frame duration is tracked across clusters for // Verify that the estimated frame duration is tracked across clusters for
// each track. // each track.
const BlockInfo kBlockInfo2[] = { const BlockInfo kBlockInfo2[] = {
{ kAudioTrackNum, 200, -22, false }, {kAudioTrackNum, 200, -22, false, NULL, 0},
{ kVideoTrackNum, 201, -33, false }, {kVideoTrackNum, 201, -33, false, NULL, 0},
}; };
int block_count2 = arraysize(kBlockInfo2); int block_count2 = arraysize(kBlockInfo2);
...@@ -887,13 +922,18 @@ TEST_F(WebMClusterParserTest, ...@@ -887,13 +922,18 @@ TEST_F(WebMClusterParserTest,
EXPECT_LT(kTestVideoFrameDefaultDurationInMs, 33); EXPECT_LT(kTestVideoFrameDefaultDurationInMs, 33);
const BlockInfo kBlockInfo[] = { const BlockInfo kBlockInfo[] = {
{ kAudioTrackNum, 0, -kTestAudioFrameDefaultDurationInMs, false }, {kAudioTrackNum, 0, -kTestAudioFrameDefaultDurationInMs, false, NULL, 0},
{ kAudioTrackNum, 23, -kTestAudioFrameDefaultDurationInMs, false }, {kAudioTrackNum, 23, -kTestAudioFrameDefaultDurationInMs, false, NULL, 0},
{ kVideoTrackNum, 33, -kTestVideoFrameDefaultDurationInMs, false }, {kVideoTrackNum, 33, -kTestVideoFrameDefaultDurationInMs, false, NULL, 0},
{ kAudioTrackNum, 46, -kTestAudioFrameDefaultDurationInMs, false }, {kAudioTrackNum, 46, -kTestAudioFrameDefaultDurationInMs, false, NULL, 0},
{ kVideoTrackNum, 67, -kTestVideoFrameDefaultDurationInMs, false }, {kVideoTrackNum, 67, -kTestVideoFrameDefaultDurationInMs, false, NULL, 0},
{ kAudioTrackNum, 69, -kTestAudioFrameDefaultDurationInMs, false }, {kAudioTrackNum, 69, -kTestAudioFrameDefaultDurationInMs, false, NULL, 0},
{ kVideoTrackNum, 100, -kTestVideoFrameDefaultDurationInMs, false }, {kVideoTrackNum,
100,
-kTestVideoFrameDefaultDurationInMs,
false,
NULL,
0},
}; };
int block_count = arraysize(kBlockInfo); int block_count = arraysize(kBlockInfo);
...@@ -954,4 +994,98 @@ TEST_F(WebMClusterParserTest, ...@@ -954,4 +994,98 @@ TEST_F(WebMClusterParserTest,
ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
} }
TEST_F(WebMClusterParserTest, ReadOpusDurationsSimpleBlockAtEndOfCluster) {
// Reset parser to expect Opus codec audio.
parser_.reset(new WebMClusterParser(
kTimecodeScale, kAudioTrackNum, kNoTimestamp(), kVideoTrackNum,
kNoTimestamp(), TextTracks(), std::set<int64>(), std::string(),
std::string(), kCodecOpus, LogCB()));
int loop_count = 0;
for (const auto* packet_ptr : BuildAllOpusPackets()) {
const BlockInfo kBlockInfo[] = {{kAudioTrackNum,
0,
packet_ptr->duration_ms(),
true, // Make it a SimpleBlock.
packet_ptr->data(),
packet_ptr->size()}};
int block_count = arraysize(kBlockInfo);
scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
int result = parser_->Parse(cluster->data(), cluster->size());
EXPECT_EQ(cluster->size(), result);
ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
loop_count++;
}
// Test should minimally cover all the combinations of config and frame count.
ASSERT_GE(loop_count, kNumPossibleOpusConfigs * kMaxOpusPacketFrameCount);
}
TEST_F(WebMClusterParserTest, PreferOpusDurationsOverBlockDurations) {
// Reset parser to expect Opus codec audio.
parser_.reset(new WebMClusterParser(
kTimecodeScale, kAudioTrackNum, kNoTimestamp(), kVideoTrackNum,
kNoTimestamp(), TextTracks(), std::set<int64>(), std::string(),
std::string(), kCodecOpus, LogCB()));
int loop_count = 0;
for (const auto* packet_ptr : BuildAllOpusPackets()) {
// Setting BlockDuration != Opus duration to see which one the parser uses.
int block_duration_ms = packet_ptr->duration_ms() + 10;
BlockInfo block_infos[] = {{kAudioTrackNum,
0,
block_duration_ms,
false, // Not a SimpleBlock.
packet_ptr->data(),
packet_ptr->size()}};
int block_count = arraysize(block_infos);
scoped_ptr<Cluster> cluster(CreateCluster(0, block_infos, block_count));
int result = parser_->Parse(cluster->data(), cluster->size());
EXPECT_EQ(cluster->size(), result);
// BlockInfo duration will be used to verify buffer duration, so changing
// duration to be that of the Opus packet to verify it was preferred.
block_infos[0].duration = packet_ptr->duration_ms();
ASSERT_TRUE(VerifyBuffers(parser_, block_infos, block_count));
loop_count++;
}
// Test should minimally cover all the combinations of config and frame count.
ASSERT_GE(loop_count, kNumPossibleOpusConfigs * kMaxOpusPacketFrameCount);
}
// Tests that BlockDuration is used to set duration on buffer rather than
// encoded duration in Opus packet (or hard coded duration estimates). Encoded
// Opus duration is usually preferred but cannot be known when encrypted.
TEST_F(WebMClusterParserTest, DontReadEncodedDurationWhenEncrypted) {
// Non-empty dummy value signals encryption is active for audio.
std::string audio_encryption_id("audio_key_id");
// Reset parser to expect Opus codec audio and use audio encryption key id.
parser_.reset(new WebMClusterParser(
kTimecodeScale, kAudioTrackNum, kNoTimestamp(), kVideoTrackNum,
kNoTimestamp(), TextTracks(), std::set<int64>(), audio_encryption_id,
std::string(), kCodecOpus, LogCB()));
// Single Block with BlockDuration and encrypted data.
const BlockInfo kBlockInfo[] = {{kAudioTrackNum,
0,
kTestAudioFrameDefaultDurationInMs,
false, // Not a SimpleBlock
kEncryptedFrame, // Encrypted frame data
arraysize(kEncryptedFrame)}};
int block_count = arraysize(kBlockInfo);
scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
int result = parser_->Parse(cluster->data(), cluster->size());
EXPECT_EQ(cluster->size(), result);
// Will verify that duration of buffer matches that of BlockDuration.
ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
}
} // namespace media } // namespace media
...@@ -240,6 +240,7 @@ int WebMStreamParser::ParseInfoAndTracks(const uint8* data, int size) { ...@@ -240,6 +240,7 @@ int WebMStreamParser::ParseInfoAndTracks(const uint8* data, int size) {
tracks_parser.ignored_tracks(), tracks_parser.ignored_tracks(),
tracks_parser.audio_encryption_key_id(), tracks_parser.audio_encryption_key_id(),
tracks_parser.video_encryption_key_id(), tracks_parser.video_encryption_key_id(),
audio_config.codec(),
log_cb_)); log_cb_));
if (!init_cb_.is_null()) if (!init_cb_.is_null())
......
...@@ -1257,6 +1257,8 @@ ...@@ -1257,6 +1257,8 @@
'formats/common/offset_byte_queue_unittest.cc', 'formats/common/offset_byte_queue_unittest.cc',
'formats/webm/cluster_builder.cc', 'formats/webm/cluster_builder.cc',
'formats/webm/cluster_builder.h', 'formats/webm/cluster_builder.h',
'formats/webm/opus_packet_builder.cc',
'formats/webm/opus_packet_builder.h',
'formats/webm/tracks_builder.cc', 'formats/webm/tracks_builder.cc',
'formats/webm/tracks_builder.h', 'formats/webm/tracks_builder.h',
'formats/webm/webm_cluster_parser_unittest.cc', 'formats/webm/webm_cluster_parser_unittest.cc',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment