Mpeg2 TS parser: Es parsing using an ES byte queue.

Before this refactoring, the ES management was left to the ES parser implementation. It is now moved to the EsParser class and ES management now relies explicitly on an ES byte queue. BUG=None Review URL: https://codereview.chromium.org/497203004 Cr-Commit-Position: refs/heads/master@{#292249}

Mpeg2 TS parser: Es parsing using an ES byte queue.
Before this refactoring, the ES management was left to the ES parser implementation. It is now moved to the EsParser class and ES management now relies explicitly on an ES byte queue. BUG=None Review URL: https://codereview.chromium.org/497203004 Cr-Commit-Position: refs/heads/master@{#292249}
a6710424 · damienv · Commit bot · 5818759b · a6710424 · a6710424
Commit a6710424 authored Aug 27, 2014 by damienv Committed by Commit bot Aug 27, 2014
8 changed files
--- a/media/BUILD.gn
+++ b/media/BUILD.gn
@@ -358,6 +358,7 @@ component("media") {
    sources += [
      "formats/mp2t/es_adapter_video.cc",
      "formats/mp2t/es_adapter_video.h",
+      "formats/mp2t/es_parser.cc",
      "formats/mp2t/es_parser.h",
      "formats/mp2t/es_parser_adts.cc",
      "formats/mp2t/es_parser_adts.h",

--- a/media/formats/mp2t/es_parser.cc
+++ b/media/formats/mp2t/es_parser.cc
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/formats/mp2t/es_parser.h"
+
+#include "media/formats/common/offset_byte_queue.h"
+
+namespace media {
+namespace mp2t {
+
+EsParser::TimingDesc::TimingDesc()
+    : dts(kNoDecodeTimestamp()),
+      pts(kNoTimestamp()) {
+}
+
+EsParser::TimingDesc::TimingDesc(
+    DecodeTimestamp dts_in, base::TimeDelta pts_in)
+    : dts(dts_in),
+      pts(pts_in) {
+}
+
+EsParser::EsParser()
+    : es_queue_(new media::OffsetByteQueue()) {
+}
+
+EsParser::~EsParser() {
+}
+
+bool EsParser::Parse(const uint8* buf, int size,
+                     base::TimeDelta pts,
+                     DecodeTimestamp dts) {
+  DCHECK(buf);
+  DCHECK_GT(size, 0);
+
+  if (pts != kNoTimestamp()) {
+    // Link the end of the byte queue with the incoming timing descriptor.
+    TimingDesc timing_desc(dts, pts);
+    timing_desc_list_.push_back(
+        std::pair<int64, TimingDesc>(es_queue_->tail(), timing_desc));
+  }
+
+  // Add the incoming bytes to the ES queue.
+  es_queue_->Push(buf, size);
+  return ParseFromEsQueue();
+}
+
+void EsParser::Reset() {
+  es_queue_.reset(new media::OffsetByteQueue());
+  timing_desc_list_.clear();
+  ResetInternal();
+}
+
+EsParser::TimingDesc EsParser::GetTimingDescriptor(int64 es_byte_count) {
+  TimingDesc timing_desc;
+  while (!timing_desc_list_.empty() &&
+         timing_desc_list_.front().first <= es_byte_count) {
+    timing_desc = timing_desc_list_.front().second;
+    timing_desc_list_.pop_front();
+  }
+  return timing_desc;
+}
+
+}  // namespace mp2t
+}  // namespace media
--- a/media/formats/mp2t/es_parser.h
+++ b/media/formats/mp2t/es_parser.h
@@ -5,6 +5,9 @@
 #ifndef MEDIA_FORMATS_MP2T_ES_PARSER_H_
 #define MEDIA_FORMATS_MP2T_ES_PARSER_H_

+#include <list>
+#include <utility>
+
 #include "base/basictypes.h"
 #include "base/callback.h"
 #include "base/memory/ref_counted.h"
@@ -14,6 +17,7 @@

 namespace media {

+class OffsetByteQueue;
 class StreamParserBuffer;

 namespace mp2t {
@@ -22,20 +26,62 @@ class MEDIA_EXPORT EsParser {
 public:
  typedef base::Callback<void(scoped_refptr<StreamParserBuffer>)> EmitBufferCB;

-  EsParser() {}
-  virtual ~EsParser() {}
+  EsParser();
+  virtual ~EsParser();

  // ES parsing.
  // Should use kNoTimestamp when a timestamp is not valid.
-  virtual bool Parse(const uint8* buf, int size,
-                     base::TimeDelta pts,
-                     DecodeTimestamp dts) = 0;
+  bool Parse(const uint8* buf, int size,
+             base::TimeDelta pts,
+             DecodeTimestamp dts);

  // Flush any pending buffer.
  virtual void Flush() = 0;

  // Reset the state of the ES parser.
-  virtual void Reset() = 0;
+  void Reset();
+
+ protected:
+  struct TimingDesc {
+    TimingDesc();
+    TimingDesc(DecodeTimestamp dts, base::TimeDelta pts);
+
+    DecodeTimestamp dts;
+    base::TimeDelta pts;
+  };
+
+  // Parse ES data from |es_queue_|.
+  // Return true when successful.
+  virtual bool ParseFromEsQueue() = 0;
+
+  // Reset the internal state of the ES parser.
+  virtual void ResetInternal() = 0;
+
+  // Get the timing descriptor with the largest byte count that is less or
+  // equal to |es_byte_count|.
+  // This timing descriptor and all the ones that come before (in stream order)
+  // are removed from list |timing_desc_list_|.
+  // If no timing descriptor is found, then the default TimingDesc is returned.
+  TimingDesc GetTimingDescriptor(int64 es_byte_count);
+
+  // Bytes of the ES stream that have not been emitted yet.
+  scoped_ptr<media::OffsetByteQueue> es_queue_;
+
+ private:
+  // Anchor some timing information into the ES queue.
+  // Here are two examples how this timing info is applied according to
+  // the MPEG-2 TS spec - ISO/IEC 13818:
+  // - "In the case of audio, if a PTS is present in PES packet header it shall
+  // refer to the first access unit commencing in the PES packet. An audio
+  // access unit commences in a PES packet if the first byte of the audio
+  // access unit is present in the PES packet."
+  // - "For AVC video streams conforming to one or more profiles defined
+  // in Annex A of Rec. ITU-T H.264 | ISO/IEC 14496-10 video, if a PTS is
+  // present in the PES packet header, it shall refer to the first AVC access
+  // unit that commences in this PES packet.
+  std::list<std::pair<int64, TimingDesc> > timing_desc_list_;
+
+  DISALLOW_COPY_AND_ASSIGN(EsParser);
 };

 }  // namespace mp2t

--- a/media/formats/mp2t/es_parser_adts.cc
+++ b/media/formats/mp2t/es_parser_adts.cc
@@ -118,24 +118,13 @@ EsParserAdts::EsParserAdts(
    bool sbr_in_mimetype)
  : new_audio_config_cb_(new_audio_config_cb),
    emit_buffer_cb_(emit_buffer_cb),
-    sbr_in_mimetype_(sbr_in_mimetype),
-    es_queue_(new media::OffsetByteQueue()) {
+    sbr_in_mimetype_(sbr_in_mimetype) {
 }

 EsParserAdts::~EsParserAdts() {
 }

-bool EsParserAdts::Parse(const uint8* buf, int size,
-                         base::TimeDelta pts,
-                         DecodeTimestamp dts) {
-  // The incoming PTS applies to the access unit that comes just after
-  // the beginning of |buf|.
-  if (pts != kNoTimestamp())
-    pts_list_.push_back(EsPts(es_queue_->tail(), pts));
-
-  // Copy the input data to the ES buffer.
-  es_queue_->Push(buf, size);
-
+bool EsParserAdts::ParseFromEsQueue() {
  // Look for every ADTS frame in the ES buffer.
  AdtsFrame adts_frame;
  while (LookForAdtsFrame(&adts_frame)) {
@@ -145,11 +134,10 @@ bool EsParserAdts::Parse(const uint8* buf, int size,
      return false;

    // Get the PTS & the duration of this access unit.
-    while (!pts_list_.empty() &&
-           pts_list_.front().first <= adts_frame.queue_offset) {
-      audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second);
-      pts_list_.pop_front();
-    }
+    TimingDesc current_timing_desc =
+        GetTimingDescriptor(adts_frame.queue_offset);
+    if (current_timing_desc.pts != kNoTimestamp())
+      audio_timestamp_helper_->SetBaseTimestamp(current_timing_desc.pts);

    if (audio_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
      DVLOG(1) << "Audio frame with unknown timestamp";
@@ -187,9 +175,7 @@ bool EsParserAdts::Parse(const uint8* buf, int size,
 void EsParserAdts::Flush() {
 }

-void EsParserAdts::Reset() {
-  es_queue_.reset(new media::OffsetByteQueue());
-  pts_list_.clear();
+void EsParserAdts::ResetInternal() {
  last_audio_decoder_config_ = AudioDecoderConfig();
 }


--- a/media/formats/mp2t/es_parser_adts.h
+++ b/media/formats/mp2t/es_parser_adts.h
@@ -36,19 +36,15 @@ class MEDIA_EXPORT EsParserAdts : public EsParser {
  virtual ~EsParserAdts();

  // EsParser implementation.
-  virtual bool Parse(const uint8* buf, int size,
-                     base::TimeDelta pts,
-                     DecodeTimestamp dts) OVERRIDE;
  virtual void Flush() OVERRIDE;
-  virtual void Reset() OVERRIDE;

 private:
-  // Used to link a PTS with a byte position in the ES stream.
-  typedef std::pair<int64, base::TimeDelta> EsPts;
-  typedef std::list<EsPts> EsPtsList;
-
  struct AdtsFrame;

+  // EsParser implementation.
+  virtual bool ParseFromEsQueue() OVERRIDE;
+  virtual void ResetInternal() OVERRIDE;
+
  // Synchronize the stream on an ADTS syncword (consuming bytes from
  // |es_queue_| if needed).
  // Returns true when a full ADTS frame has been found: in that case
@@ -74,12 +70,6 @@ class MEDIA_EXPORT EsParserAdts : public EsParser {
  // (mp4a.40.5 in the codecs parameter).
  bool sbr_in_mimetype_;

-  // Bytes of the ES stream that have not been emitted yet.
-  scoped_ptr<media::OffsetByteQueue> es_queue_;
-
-  // List of PTS associated with a position in the ES stream.
-  EsPtsList pts_list_;
-
  // Interpolated PTS for frames that don't have one.
  scoped_ptr<AudioTimestampHelper> audio_timestamp_helper_;


--- a/media/formats/mp2t/es_parser_h264.cc
+++ b/media/formats/mp2t/es_parser_h264.cc
@@ -28,7 +28,6 @@ EsParserH264::EsParserH264(
    const NewVideoConfigCB& new_video_config_cb,
    const EmitBufferCB& emit_buffer_cb)
    : es_adapter_(new_video_config_cb, emit_buffer_cb),
-      es_queue_(new media::OffsetByteQueue()),
      h264_parser_(new H264Parser()),
      current_access_unit_pos_(0),
      next_access_unit_pos_(0) {
@@ -37,37 +36,8 @@ EsParserH264::EsParserH264(
 EsParserH264::~EsParserH264() {
 }

-bool EsParserH264::Parse(const uint8* buf, int size,
-                         base::TimeDelta pts,
-                         DecodeTimestamp dts) {
-  // Note: Parse is invoked each time a PES packet has been reassembled.
-  // Unfortunately, a PES packet does not necessarily map
-  // to an h264 access unit, although the HLS recommendation is to use one PES
-  // for each access unit (but this is just a recommendation and some streams
-  // do not comply with this recommendation).
-
-  // HLS recommendation: "In AVC video, you should have both a DTS and a
-  // PTS in each PES header".
-  // However, some streams do not comply with this recommendation.
-  DVLOG_IF(1, pts == kNoTimestamp()) << "Each video PES should have a PTS";
-  if (pts != kNoTimestamp()) {
-    TimingDesc timing_desc;
-    timing_desc.pts = pts;
-    timing_desc.dts = (dts != kNoDecodeTimestamp()) ? dts :
-        DecodeTimestamp::FromPresentationTime(pts);
-
-    // Link the end of the byte queue with the incoming timing descriptor.
-    timing_desc_list_.push_back(
-        std::pair<int64, TimingDesc>(es_queue_->tail(), timing_desc));
-  }
-
-  // Add the incoming bytes to the ES queue.
-  es_queue_->Push(buf, size);
-  return ParseInternal();
-}
-
 void EsParserH264::Flush() {
-  DVLOG(1) << "EsParserH264::Flush";
+  DVLOG(1) << __FUNCTION__;
  if (!FindAUD(&current_access_unit_pos_))
    return;

@@ -75,18 +45,16 @@ void EsParserH264::Flush() {
  // which is assumed to be complete at this point.
  uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 };
  es_queue_->Push(aud, sizeof(aud));
-  ParseInternal();
+  ParseFromEsQueue();

  es_adapter_.Flush();
 }

-void EsParserH264::Reset() {
-  DVLOG(1) << "EsParserH264::Reset";
-  es_queue_.reset(new media::OffsetByteQueue());
+void EsParserH264::ResetInternal() {
+  DVLOG(1) << __FUNCTION__;
  h264_parser_.reset(new H264Parser());
  current_access_unit_pos_ = 0;
  next_access_unit_pos_ = 0;
-  timing_desc_list_.clear();
  last_video_decoder_config_ = VideoDecoderConfig();
  es_adapter_.Reset();
 }
@@ -123,7 +91,7 @@ bool EsParserH264::FindAUD(int64* stream_pos) {
  return true;
 }

-bool EsParserH264::ParseInternal() {
+bool EsParserH264::ParseFromEsQueue() {
  DCHECK_LE(es_queue_->head(), current_access_unit_pos_);
  DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_);
  DCHECK_LE(next_access_unit_pos_, es_queue_->tail());
@@ -232,15 +200,15 @@ bool EsParserH264::ParseInternal() {
 bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size,
                             bool is_key_frame, int pps_id) {
  // Get the access unit timing info.
-  TimingDesc current_timing_desc = {kNoDecodeTimestamp(), kNoTimestamp()};
-  while (!timing_desc_list_.empty() &&
-         timing_desc_list_.front().first <= access_unit_pos) {
-    current_timing_desc = timing_desc_list_.front().second;
-    timing_desc_list_.pop_front();
-  }
+  TimingDesc current_timing_desc = GetTimingDescriptor(access_unit_pos);
  if (current_timing_desc.pts == kNoTimestamp())
    return false;

+  if (current_timing_desc.dts == kNoDecodeTimestamp()) {
+    current_timing_desc.dts =
+        DecodeTimestamp::FromPresentationTime(current_timing_desc.pts);
+  }
+
  // Update the video decoder configuration if needed.
  const H264PPS* pps = h264_parser_->GetPPS(pps_id);
  if (!pps) {

--- a/media/formats/mp2t/es_parser_h264.h
+++ b/media/formats/mp2t/es_parser_h264.h
@@ -27,10 +27,14 @@ class OffsetByteQueue;
 namespace media {
 namespace mp2t {

-// Remark:
-// In this h264 parser, frame splitting is based on AUD nals.
+// A few remarks:
+// - In this h264 parser, frame splitting is based on AUD nals.
 // Mpeg2 TS spec: "2.14 Carriage of Rec. ITU-T H.264 | ISO/IEC 14496-10 video"
 // "Each AVC access unit shall contain an access unit delimiter NAL Unit;"
+// - PES packets do not necessarily map to an H264 access unit although the HLS
+// recommendation is to use one PES for each access unit. In this parser,
+// we handle the general case and do not make any assumption about the access
+// unit organization within PES packets.
 //
 class MEDIA_EXPORT EsParserH264 : public EsParser {
 public:
@@ -41,17 +45,12 @@ class MEDIA_EXPORT EsParserH264 : public EsParser {
  virtual ~EsParserH264();

  // EsParser implementation.
-  virtual bool Parse(const uint8* buf, int size,
-                     base::TimeDelta pts,
-                     DecodeTimestamp dts) OVERRIDE;
  virtual void Flush() OVERRIDE;
-  virtual void Reset() OVERRIDE;

 private:
-  struct TimingDesc {
-    DecodeTimestamp dts;
-    base::TimeDelta pts;
-  };
+  // EsParser implementation.
+  virtual bool ParseFromEsQueue() OVERRIDE;
+  virtual void ResetInternal() OVERRIDE;

  // Find the AUD located at or after |*stream_pos|.
  // Return true if an AUD is found.
@@ -60,10 +59,6 @@ class MEDIA_EXPORT EsParserH264 : public EsParser {
  // of the start code parser.
  bool FindAUD(int64* stream_pos);

-  // Resumes the H264 ES parsing.
-  // Return true if successful.
-  bool ParseInternal();
-
  // Emit a frame whose position in the ES queue starts at |access_unit_pos|.
  // Returns true if successful, false if no PTS is available for the frame.
  bool EmitFrame(int64 access_unit_pos, int access_unit_size,
@@ -75,10 +70,6 @@ class MEDIA_EXPORT EsParserH264 : public EsParser {

  EsAdapterVideo es_adapter_;

-  // Bytes of the ES stream that have not been emitted yet.
-  scoped_ptr<media::OffsetByteQueue> es_queue_;
-  std::list<std::pair<int64, TimingDesc> > timing_desc_list_;
-
  // H264 parser state.
  // - |current_access_unit_pos_| is pointing to an annexB syncword
  // representing the first NALU of an H264 access unit.

--- a/media/media.gyp
+++ b/media/media.gyp
@@ -927,6 +927,7 @@
            'filters/h264_to_annex_b_bitstream_converter.h',
            'formats/mp2t/es_adapter_video.cc',
            'formats/mp2t/es_adapter_video.h',
+            'formats/mp2t/es_parser.cc',
            'formats/mp2t/es_parser.h',
            'formats/mp2t/es_parser_adts.cc',
            'formats/mp2t/es_parser_adts.h',