Fix muxed MP4 parsing so it won't crash on partial media segment appends.

Partial appends could cause the MP4StreamParser to emit buffers in non-monotonically increasing timestamp order because of how we process trun boxes. This patch makes sure that processing of trun boxes is deferred until we have all the sample & aux_info data before emitting samples. This prevents the parser from emitting samples in such a way that will break downstream code. This is a minimal impact fix to avoid bad behavior at the expense of buffering more data. BUG=None TEST=All MP4StreamParserTests that do partial appends now verify the fix. Review URL: https://codereview.chromium.org/348623003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@278650 0039d316-1c4b-4281-b951-d872f2087c98

Fix muxed MP4 parsing so it won't crash on partial media segment appends.
Partial appends could cause the MP4StreamParser to emit buffers in non-monotonically increasing timestamp order because of how we process trun boxes. This patch makes sure that processing of trun boxes is deferred until we have all the sample & aux_info data before emitting samples. This prevents the parser from emitting samples in such a way that will break downstream code. This is a minimal impact fix to avoid bad behavior at the expense of buffering more data. BUG=None TEST=All MP4StreamParserTests that do partial appends now verify the fix. Review URL: https://codereview.chromium.org/348623003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@278650 0039d316-1c4b-4281-b951-d872f2087c98
d749be45 · acolwell@chromium.org · fc43f5a2 · d749be45 · d749be45 · d749be45
Commit d749be45 authored Jun 20, 2014 by acolwell@chromium.org
3 changed files
--- a/media/formats/mp4/mp4_stream_parser.cc
+++ b/media/formats/mp4/mp4_stream_parser.cc
@@ -92,18 +92,33 @@ bool MP4StreamParser::Parse(const uint8* buf, int size) {
  BufferQueue audio_buffers;
  BufferQueue video_buffers;

-  bool result, err = false;
+  bool result = false;
+  bool err = false;

  do {
-    if (state_ == kParsingBoxes) {
-      result = ParseBox(&err);
-    } else {
-      DCHECK_EQ(kEmittingSamples, state_);
-      result = EnqueueSample(&audio_buffers, &video_buffers, &err);
-      if (result) {
-        int64 max_clear = runs_->GetMaxClearOffset() + moof_head_;
-        err = !ReadAndDiscardMDATsUntil(max_clear);
-      }
+    switch (state_) {
+      case kWaitingForInit:
+      case kError:
+        NOTREACHED();
+        return false;
+
+      case kParsingBoxes:
+        result = ParseBox(&err);
+        break;
+
+      case kWaitingForSampleData:
+        result = HaveEnoughDataToEnqueueSamples();
+        if (result)
+          ChangeState(kEmittingSamples);
+        break;
+
+      case kEmittingSamples:
+        result = EnqueueSample(&audio_buffers, &video_buffers, &err);
+        if (result) {
+          int64 max_clear = runs_->GetMaxClearOffset() + moof_head_;
+          err = !ReadAndDiscardMDATsUntil(max_clear);
+        }
+        break;
    }
  } while (result && !err);

@@ -312,9 +327,10 @@ bool MP4StreamParser::ParseMoof(BoxReader* reader) {
  if (!runs_)
    runs_.reset(new TrackRunIterator(moov_.get(), log_cb_));
  RCHECK(runs_->Init(moof));
+  RCHECK(ComputeHighestEndOffset(moof));
  EmitNeedKeyIfNecessary(moof.pssh);
  new_segment_cb_.Run();
-  ChangeState(kEmittingSamples);
+  ChangeState(kWaitingForSampleData);
  return true;
 }

@@ -393,6 +409,8 @@ bool MP4StreamParser::PrepareAACBuffer(
 bool MP4StreamParser::EnqueueSample(BufferQueue* audio_buffers,
                                    BufferQueue* video_buffers,
                                    bool* err) {
+  DCHECK_EQ(state_, kEmittingSamples);
+
  if (!runs_->IsRunValid()) {
    // Flush any buffers we've gotten in this chunk so that buffers don't
    // cross NewSegment() calls
@@ -400,7 +418,7 @@ bool MP4StreamParser::EnqueueSample(BufferQueue* audio_buffers,
    if (*err)
      return false;

-    // Remain in kEnqueueingSamples state, discarding data, until the end of
+    // Remain in kEmittingSamples state, discarding data, until the end of
    // the current 'mdat' box has been appended to the queue.
    if (!queue_.Trim(mdat_tail_))
      return false;
@@ -426,8 +444,10 @@ bool MP4StreamParser::EnqueueSample(BufferQueue* audio_buffers,
  bool video = has_video_ && video_track_id_ == runs_->track_id();

  // Skip this entire track if it's not one we're interested in
-  if (!audio && !video)
+  if (!audio && !video) {
    runs_->AdvanceRun();
+    return true;
+  }

  // Attempt to cache the auxiliary information first. Aux info is usually
  // placed in a contiguous block before the sample data, rather than being
@@ -577,5 +597,40 @@ void MP4StreamParser::ChangeState(State new_state) {
  state_ = new_state;
 }

+bool MP4StreamParser::HaveEnoughDataToEnqueueSamples() {
+  DCHECK_EQ(state_, kWaitingForSampleData);
+  // For muxed content, make sure we have data up to |highest_end_offset_|
+  // so we can ensure proper enqueuing behavior. Otherwise assume we have enough
+  // data and allow per sample offset checks to meter sample enqueuing.
+  // TODO(acolwell): Fix trun box handling so we don't have to special case
+  // muxed content.
+  return !(has_audio_ && has_video_ &&
+           queue_.tail() < highest_end_offset_ + moof_head_);
+}
+
+bool MP4StreamParser::ComputeHighestEndOffset(const MovieFragment& moof) {
+  highest_end_offset_ = 0;
+
+  TrackRunIterator runs(moov_.get(), log_cb_);
+  RCHECK(runs.Init(moof));
+
+  while (runs.IsRunValid()) {
+    int64 aux_info_end_offset = runs.aux_info_offset() + runs.aux_info_size();
+    if (aux_info_end_offset > highest_end_offset_)
+      highest_end_offset_ = aux_info_end_offset;
+
+    while (runs.IsSampleValid()) {
+      int64 sample_end_offset = runs.sample_offset() + runs.sample_size();
+      if (sample_end_offset > highest_end_offset_)
+        highest_end_offset_ = sample_end_offset;
+
+      runs.AdvanceSample();
+    }
+    runs.AdvanceRun();
+  }
+
+  return true;
+}
+
 }  // namespace mp4
 }  // namespace media
--- a/media/formats/mp4/mp4_stream_parser.h
+++ b/media/formats/mp4/mp4_stream_parser.h
@@ -42,6 +42,7 @@ class MEDIA_EXPORT MP4StreamParser : public StreamParser {
  enum State {
    kWaitingForInit,
    kParsingBoxes,
+    kWaitingForSampleData,
    kEmittingSamples,
    kError
  };
@@ -78,6 +79,15 @@ class MEDIA_EXPORT MP4StreamParser : public StreamParser {

  void Reset();

+  // Checks to see if we have enough data in |queue_| to transition to
+  // kEmittingSamples and start enqueuing samples.
+  bool HaveEnoughDataToEnqueueSamples();
+
+  // Sets |highest_end_offset_| based on the data in |moov_|
+  // and |moof|. Returns true if |highest_end_offset_| was successfully
+  // computed.
+  bool ComputeHighestEndOffset(const MovieFragment& moof);
+
  State state_;
  InitCB init_cb_;
  NewConfigCB config_cb_;
@@ -99,6 +109,11 @@ class MEDIA_EXPORT MP4StreamParser : public StreamParser {
  // Valid iff it is greater than the head of the queue.
  int64 mdat_tail_;

+  // The highest end offset in the current moof. This offset is
+  // relative to |moof_head_|. This value is used to make sure we have collected
+  // enough bytes to parse all samples and aux_info in the current moof.
+  int64 highest_end_offset_;
+
  scoped_ptr<mp4::Movie> moov_;
  scoped_ptr<mp4::TrackRunIterator> runs_;


--- a/media/formats/mp4/mp4_stream_parser_unittest.cc
+++ b/media/formats/mp4/mp4_stream_parser_unittest.cc
@@ -31,7 +31,8 @@ static const char kMp4InitDataType[] = "video/mp4";
 class MP4StreamParserTest : public testing::Test {
 public:
  MP4StreamParserTest()
-      : configs_received_(false) {
+      : configs_received_(false),
+        lower_bound_(base::TimeDelta::Max()) {
    std::set<int> audio_object_types;
    audio_object_types.insert(kISO_14496_3);
    parser_.reset(new MP4StreamParser(audio_object_types, false));
@@ -40,6 +41,7 @@ class MP4StreamParserTest : public testing::Test {
 protected:
  scoped_ptr<MP4StreamParser> parser_;
  bool configs_received_;
+  base::TimeDelta lower_bound_;

  bool AppendData(const uint8* data, size_t length) {
    return parser_->Parse(data, length);
@@ -73,7 +75,6 @@ class MP4StreamParserTest : public testing::Test {
    return true;
  }

-
  void DumpBuffers(const std::string& label,
                   const StreamParser::BufferQueue& buffers) {
    DVLOG(2) << "DumpBuffers: " << label << " size " << buffers.size();
@@ -96,6 +97,24 @@ class MP4StreamParserTest : public testing::Test {
    if (!text_map.empty())
      return false;

+    // Find the second highest timestamp so that we know what the
+    // timestamps on the next set of buffers must be >= than.
+    base::TimeDelta audio = !audio_buffers.empty() ?
+        audio_buffers.back()->GetDecodeTimestamp() : kNoTimestamp();
+    base::TimeDelta video = !video_buffers.empty() ?
+        video_buffers.back()->GetDecodeTimestamp() : kNoTimestamp();
+    base::TimeDelta second_highest_timestamp =
+        (audio == kNoTimestamp() ||
+         (video != kNoTimestamp() && audio > video)) ? video : audio;
+
+    DCHECK(second_highest_timestamp != kNoTimestamp());
+
+    if (lower_bound_ != kNoTimestamp() &&
+        second_highest_timestamp < lower_bound_) {
+      return false;
+    }
+
+    lower_bound_ = second_highest_timestamp;
    return true;
  }

@@ -108,10 +127,12 @@ class MP4StreamParserTest : public testing::Test {

  void NewSegmentF() {
    DVLOG(1) << "NewSegmentF";
+    lower_bound_ = kNoTimestamp();
  }

  void EndOfSegmentF() {
    DVLOG(1) << "EndOfSegmentF()";
+    lower_bound_ = base::TimeDelta::Max();
  }

  void InitializeParser() {