[MSE][WebCodecs] WSBI::AppendChunks through parser, plus e2e test

This change: 1) Implements WebSourceBufferImpl::AppendChunks(), plumbing the appended chunks through ChunkDemuxer into the stream parser's ProcessChunks(). 2) Implements WebCodecsEncodedChunkStreamParser::ProcessChunks(), emitting cached config when necessary (prior to calling the NewBuffersCB, initially, or if a flush() operation had just happened before the chunks were appended. 3) Aligns as much as possible with existing architecture for similar handling by regular bytestream parsers, for improved reuse and maintainability. 4) Adds a basic end-to-end vp9 video chunk buffering and playback test. Later refinements will add more tests and may improve usability with things like EncodedAudioChunk duration, EncodedVideoChunk decodeTimestamp, and h264 support (where applicable by build flags). BUG=1144908 Change-Id: Ib787d22b0171826fa28bdbb776410e02e25a9cba Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2575501 Commit-Queue: Matthew Wolenetz <wolenetz@chromium.org> Reviewed-by: Dan Sanders <sandersd@chromium.org> Cr-Commit-Position: refs/heads/master@{#836204}

[MSE][WebCodecs] WSBI::AppendChunks through parser, plus e2e test
This change: 1) Implements WebSourceBufferImpl::AppendChunks(), plumbing the appended chunks through ChunkDemuxer into the stream parser's ProcessChunks(). 2) Implements WebCodecsEncodedChunkStreamParser::ProcessChunks(), emitting cached config when necessary (prior to calling the NewBuffersCB, initially, or if a flush() operation had just happened before the chunks were appended. 3) Aligns as much as possible with existing architecture for similar handling by regular bytestream parsers, for improved reuse and maintainability. 4) Adds a basic end-to-end vp9 video chunk buffering and playback test. Later refinements will add more tests and may improve usability with things like EncodedAudioChunk duration, EncodedVideoChunk decodeTimestamp, and h264 support (where applicable by build flags). BUG=1144908 Change-Id: Ib787d22b0171826fa28bdbb776410e02e25a9cba Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2575501 Commit-Queue: Matthew Wolenetz <wolenetz@chromium.org> Reviewed-by: Dan Sanders <sandersd@chromium.org> Cr-Commit-Position: refs/heads/master@{#836204}
fb340a76 · Matt Wolenetz · Chromium LUCI CQ · 957686f8 · fb340a76 · fb340a76
Commit fb340a76 authored Dec 11, 2020 by Matt Wolenetz Committed by Chromium LUCI CQ Dec 11, 2020
9 changed files
--- a/media/blink/websourcebuffer_impl.cc
+++ b/media/blink/websourcebuffer_impl.cc
@@ -149,10 +149,18 @@ bool WebSourceBufferImpl::Append(const unsigned char* data,
 bool WebSourceBufferImpl::AppendChunks(
    std::unique_ptr<media::StreamParser::BufferQueue> buffer_queue,
    double* timestamp_offset) {
-  // TODO(crbug.com/1144908): Continue MSE-for-WebCodecs encoded chunk buffering
-  // implementation from here through ChunkDemuxer/SourceBufferState/etc.
-  NOTIMPLEMENTED();
-  return false;
+  base::TimeDelta old_offset = timestamp_offset_;
+  bool success =
+      demuxer_->AppendChunks(id_, std::move(buffer_queue), append_window_start_,
+                             append_window_end_, &timestamp_offset_);
+
+  // Like in ::Append, timestamp_offset may be updated by coded frame
+  // processing.
+  // TODO(crbug.com/1144908): Consider refactoring this common bit into helper.
+  if (timestamp_offset && old_offset != timestamp_offset_)
+    *timestamp_offset = timestamp_offset_.InSecondsF();
+
+  return success;
 }

 void WebSourceBufferImpl::ResetParserState() {

--- a/media/filters/chunk_demuxer.cc
+++ b/media/filters/chunk_demuxer.cc
@@ -964,6 +964,66 @@ bool ChunkDemuxer::AppendData(const std::string& id,
  return true;
 }

+bool ChunkDemuxer::AppendChunks(
+    const std::string& id,
+    std::unique_ptr<StreamParser::BufferQueue> buffer_queue,
+    base::TimeDelta append_window_start,
+    base::TimeDelta append_window_end,
+    base::TimeDelta* timestamp_offset) {
+  DCHECK(buffer_queue);
+  DVLOG(1) << __func__ << ": " << id
+           << ", buffer_queue size()=" << buffer_queue->size();
+
+  DCHECK(!id.empty());
+  DCHECK(timestamp_offset);
+
+  Ranges<TimeDelta> ranges;
+
+  {
+    base::AutoLock auto_lock(lock_);
+    DCHECK_NE(state_, ENDED);
+
+    // Capture if any of the SourceBuffers are waiting for data before we start
+    // buffering new chunks.
+    bool old_waiting_for_data = IsSeekWaitingForData_Locked();
+
+    if (buffer_queue->size() == 0u)
+      return true;
+
+    switch (state_) {
+      case INITIALIZING:
+      case INITIALIZED:
+        DCHECK(IsValidId(id));
+        if (!source_state_map_[id]->AppendChunks(
+                std::move(buffer_queue), append_window_start, append_window_end,
+                timestamp_offset)) {
+          ReportError_Locked(CHUNK_DEMUXER_ERROR_APPEND_FAILED);
+          return false;
+        }
+        break;
+
+      case PARSE_ERROR:
+      case WAITING_FOR_INIT:
+      case ENDED:
+      case SHUTDOWN:
+        DVLOG(1) << "AppendChunks(): called in unexpected state " << state_;
+        return false;
+    }
+
+    // Check to see if data was appended at the pending seek point. This
+    // indicates we have parsed enough data to complete the seek. Work is still
+    // in progress at this point, but it's okay since |seek_cb_| will post.
+    if (old_waiting_for_data && !IsSeekWaitingForData_Locked() && seek_cb_)
+      RunSeekCB_Locked(PIPELINE_OK);
+
+    ranges = GetBufferedRanges_Locked();
+  }
+
+  host_->OnBufferedTimeRangesChanged(ranges);
+  progress_cb_.Run();
+  return true;
+}
+
 void ChunkDemuxer::ResetParserState(const std::string& id,
                                    TimeDelta append_window_start,
                                    TimeDelta append_window_end,
@@ -1358,7 +1418,7 @@ ChunkDemuxerStream* ChunkDemuxer::CreateDemuxerStream(
    DemuxerStream::Type type) {
  // New ChunkDemuxerStreams can be created only during initialization segment
  // processing, which happens when a new chunk of data is appended and the
-  // lock_ must be held by ChunkDemuxer::AppendData.
+  // lock_ must be held by ChunkDemuxer::AppendData/Chunks.
  lock_.AssertAcquired();

  MediaTrack::Id media_track_id = GenerateMediaTrackId();

--- a/media/filters/chunk_demuxer.h
+++ b/media/filters/chunk_demuxer.h
@@ -202,7 +202,7 @@ class MEDIA_EXPORT ChunkDemuxer : public Demuxer {
  };

  // |open_cb| Run when Initialize() is called to signal that the demuxer
-  //   is ready to receive media data via AppendData().
+  //   is ready to receive media data via AppendData/Chunks().
  // |progress_cb| Run each time data is appended.
  // |encrypted_media_init_data_cb| Run when the demuxer determines that an
  //   encryption key is needed to decrypt the content.
@@ -236,7 +236,7 @@ class MEDIA_EXPORT ChunkDemuxer : public Demuxer {
  void StartWaitingForSeek(base::TimeDelta seek_time) override;
  void CancelPendingSeek(base::TimeDelta seek_time) override;

-  // Registers a new |id| to use for AppendData() calls. |content_type|
+  // Registers a new |id| to use for AppendData/Chunks() calls. |content_type|
  // indicates the MIME type's ContentType and |codecs| indicates the MIME
  // type's "codecs" parameter string (if any) for the data that we intend to
  // append for this ID.  kOk is returned if the demuxer has enough resources to
@@ -297,6 +297,16 @@ class MEDIA_EXPORT ChunkDemuxer : public Demuxer {
                  base::TimeDelta append_window_end,
                  base::TimeDelta* timestamp_offset);

+  // Appends webcodecs encoded chunks (already converted by caller into a
+  // BufferQueue of StreamParserBuffers) to the source buffer associated with
+  // |id|, with same semantic for other parameters and return value as
+  // AppendData().
+  bool AppendChunks(const std::string& id,
+                    std::unique_ptr<StreamParser::BufferQueue> buffer_queue,
+                    base::TimeDelta append_window_start,
+                    base::TimeDelta append_window_end,
+                    base::TimeDelta* timestamp_offset);
+
  // Aborts parsing the current segment and reset the parser to a state where
  // it can accept a new segment.
  // Some pending frames can be emitted during that process. These frames are
@@ -518,7 +528,7 @@ class MEDIA_EXPORT ChunkDemuxer : public Demuxer {
  base::TimeDelta duration_;

  // The duration passed to the last SetDuration(). If
-  // SetDuration() is never called or an AppendData() call or
+  // SetDuration() is never called or an AppendData/Chunks() call or
  // a EndOfStream() call changes |duration_|, then this
  // variable is set to < 0 to indicate that the |duration_| represents
  // the actual duration instead of a user specified value.

--- a/media/filters/source_buffer_state.cc
+++ b/media/filters/source_buffer_state.cc
@@ -214,8 +214,8 @@ bool SourceBufferState::Append(const uint8_t* data,
  append_window_end_during_append_ = append_window_end;
  timestamp_offset_during_append_ = timestamp_offset;

-  // TODO(wolenetz/acolwell): Curry and pass a NewBuffersCB here bound with
-  // append window and timestamp offset pointer. See http://crbug.com/351454.
+  // TODO(wolenetz): Curry and pass a NewBuffersCB here bound with append window
+  // and timestamp offset pointer. See http://crbug.com/351454.
  bool result = stream_parser_->Parse(data, length);
  if (!result) {
    MEDIA_LOG(ERROR, media_log_)
@@ -229,6 +229,31 @@ bool SourceBufferState::Append(const uint8_t* data,
  return result;
 }

+bool SourceBufferState::AppendChunks(
+    std::unique_ptr<StreamParser::BufferQueue> buffer_queue,
+    TimeDelta append_window_start,
+    TimeDelta append_window_end,
+    TimeDelta* timestamp_offset) {
+  append_in_progress_ = true;
+  DCHECK(timestamp_offset);
+  DCHECK(!timestamp_offset_during_append_);
+  append_window_start_during_append_ = append_window_start;
+  append_window_end_during_append_ = append_window_end;
+  timestamp_offset_during_append_ = timestamp_offset;
+
+  // TODO(wolenetz): Curry and pass a NewBuffersCB here bound with append window
+  // and timestamp offset pointer. See http://crbug.com/351454.
+  bool result = stream_parser_->ProcessChunks(std::move(buffer_queue));
+  if (!result) {
+    MEDIA_LOG(ERROR, media_log_)
+        << __func__ << ": Processing encoded chunks for buffering failed.";
+  }
+
+  timestamp_offset_during_append_ = nullptr;
+  append_in_progress_ = false;
+  return result;
+}
+
 void SourceBufferState::ResetParserState(TimeDelta append_window_start,
                                         TimeDelta append_window_end,
                                         base::TimeDelta* timestamp_offset) {

--- a/media/filters/source_buffer_state.h
+++ b/media/filters/source_buffer_state.h
@@ -62,11 +62,16 @@ class MEDIA_EXPORT SourceBufferState {
  // append. |append_window_start| and |append_window_end| correspond to the MSE
  // spec's similarly named source buffer attributes that are used in coded
  // frame processing.
+  // AppendChunks appends the provided BufferQueue.
  bool Append(const uint8_t* data,
              size_t length,
              TimeDelta append_window_start,
              TimeDelta append_window_end,
              TimeDelta* timestamp_offset);
+  bool AppendChunks(std::unique_ptr<StreamParser::BufferQueue> buffer_queue,
+                    TimeDelta append_window_start,
+                    TimeDelta append_window_end,
+                    TimeDelta* timestamp_offset);

  // Aborts the current append sequence and resets the parser.
  void ResetParserState(TimeDelta append_window_start,

--- a/media/formats/webcodecs/webcodecs_encoded_chunk_stream_parser.cc
+++ b/media/formats/webcodecs/webcodecs_encoded_chunk_stream_parser.cc
@@ -14,6 +14,18 @@
 #include "media/base/media_tracks.h"
 #include "media/base/stream_parser_buffer.h"
 #include "media/base/text_track_config.h"
+#include "media/base/timestamp_constants.h"
+
+namespace {
+
+// TODO(crbug.com/1144908): Since these must be identical to those generated
+// in the SourceBuffer, consider moving these to possibly stream_parser.h.
+// Meanwhile, must be kept in sync with similar constexpr in SourceBuffer
+// manually.
+constexpr media::StreamParser::TrackId kWebCodecsAudioTrackId = 1;
+constexpr media::StreamParser::TrackId kWebCodecsVideoTrackId = 2;
+
+}  // namespace

 namespace media {

@@ -70,6 +82,11 @@ bool WebCodecsEncodedChunkStreamParser::GetGenerateTimestampsFlag() const {

 bool WebCodecsEncodedChunkStreamParser::Parse(const uint8_t* /* buf */,
                                              int /* size */) {
+  // TODO(crbug.com/1144908): Protect against app reaching this (and similer
+  // inverse case in other parsers) simply by using the wrong append method on
+  // the SourceBuffer. Maybe a better MEDIA_LOG here would be sufficient?  Or
+  // instead have the top-level SourceBuffer throw synchronous exception when
+  // attempting the wrong append method, without causing parse/decode error?
  NOTREACHED();  // ProcessChunks() is the method to use instead for this
                 // parser.
  return false;
@@ -77,9 +94,80 @@ bool WebCodecsEncodedChunkStreamParser::Parse(const uint8_t* /* buf */,

 bool WebCodecsEncodedChunkStreamParser::ProcessChunks(
    std::unique_ptr<BufferQueue> buffer_queue) {
-  // TODO(crbug.com/1144908): Implement.
-  NOTIMPLEMENTED();
-  return false;
+  DCHECK_NE(state_, kWaitingForInit);
+
+  if (state_ == kError)
+    return false;
+
+  if (state_ == kWaitingForConfigEmission) {
+    // Must (still) have only one config. We'll retain ownership.
+    // MediaTracks::AddAudio/VideoTrack copies the config.
+    DCHECK((audio_config_ && !video_config_) ||
+           (video_config_ && !audio_config_));
+    auto media_tracks = std::make_unique<MediaTracks>();
+    if (audio_config_) {
+      media_tracks->AddAudioTrack(
+          *audio_config_, kWebCodecsAudioTrackId, MediaTrack::Kind("main"),
+          MediaTrack::Label(""), MediaTrack::Language(""));
+    } else if (video_config_) {
+      media_tracks->AddVideoTrack(
+          *video_config_, kWebCodecsVideoTrackId, MediaTrack::Kind("main"),
+          MediaTrack::Label(""), MediaTrack::Language(""));
+    }
+
+    if (!config_cb_.Run(std::move(media_tracks), TextTrackConfigMap())) {
+      ChangeState(kError);
+      return false;
+    }
+
+    if (init_cb_) {
+      InitParameters params(kInfiniteDuration);
+      params.liveness = DemuxerStream::LIVENESS_UNKNOWN;
+      if (audio_config_)
+        params.detected_audio_track_count = 1;
+      if (video_config_)
+        params.detected_video_track_count = 1;
+      params.detected_text_track_count = 0;
+      std::move(init_cb_).Run(params);
+    }
+
+    ChangeState(kWaitingForEncodedChunks);
+  }
+
+  DCHECK_EQ(state_, kWaitingForEncodedChunks);
+
+  // All of |buffer_queue| must be of the media type (audio or video)
+  // corresponding to the exactly one type of decoder config we have. Otherwise,
+  // the caller has provided encoded chunks for the wrong kind of config.
+  DemuxerStream::Type expected_type =
+      audio_config_ ? DemuxerStream::AUDIO : DemuxerStream::VIDEO;
+  for (const auto& it : *buffer_queue) {
+    if (it->type() != expected_type) {
+      MEDIA_LOG(ERROR, media_log_)
+          << "Incorrect EncodedChunk type (audio vs video) appended";
+      ChangeState(kError);
+      return false;
+    }
+  }
+
+  // TODO(crbug.com/1144908): Add a different new_buffers_cb type for us to use
+  // so that we can just std::move the buffer_queue, and avoid potential issues
+  // with out-of-order timestamps in the caller-provided queue that would
+  // otherwise cause parse failure in MergeBufferQueues with the current, legacy
+  // style of new_buffers_cb that depends on parsers to emit sanely time-ordered
+  // groups of frames from *muxed* multi-track bytestreams. FrameProcessor is
+  // capable of handling our buffer_queue verbatim.
+  BufferQueueMap buffers;
+  if (audio_config_)
+    buffers.insert(std::make_pair(kWebCodecsAudioTrackId, *buffer_queue));
+  else
+    buffers.insert(std::make_pair(kWebCodecsVideoTrackId, *buffer_queue));
+  new_segment_cb_.Run();
+  if (!new_buffers_cb_.Run(buffers))
+    return false;
+  end_of_segment_cb_.Run();
+
+  return true;
 }

 void WebCodecsEncodedChunkStreamParser::ChangeState(State new_state) {

--- a/third_party/blink/renderer/modules/mediasource/source_buffer.cc
+++ b/third_party/blink/renderer/modules/mediasource/source_buffer.cc
@@ -124,6 +124,10 @@ WTF::String WebTimeRangesToString(const WebTimeRanges& ranges) {
 // TrackDefaults makes a return to MSE spec, so that apps can provide
 // name/label/kind/etc metadata for tracks originating from appended WebCodecs
 // chunks.
+// TODO(crbug.com/1144908): Since these must be identical to those generated
+// in the underlying WebCodecsEncodedChunkStreamParser, consider moving these
+// to possibly stream_parser.h. Meanwhile, must be kept in sync with similar
+// constexpr in that parser manually.
 constexpr media::StreamParser::TrackId kWebCodecsAudioTrackId = 1;
 constexpr media::StreamParser::TrackId kWebCodecsVideoTrackId = 2;


--- a/third_party/blink/web_tests/external/wpt/media-source/mse-for-webcodecs/tentative/mediasource-webcodecs-appendencodedchunks-play.html
+++ b/third_party/blink/web_tests/external/wpt/media-source/mse-for-webcodecs/tentative/mediasource-webcodecs-appendencodedchunks-play.html
+<!DOCTYPE html>
+<html>
+  <title>Test basic encoded chunk buffering and playback with MediaSource</title>
+<script src="/resources/testharness.js"></script>
+<script src="/resources/testharnessreport.js"></script>
+<script>
+
+setup(() => {
+  assert_implements(
+      SourceBuffer.prototype.hasOwnProperty('appendEncodedChunks'),
+      'SourceBuffer prototype hasOwnProperty "appendEncodedChunks", used ' +
+          'here to feature detect MSE-for-WebCodecs implementation.');
+});
+
+// TODO(crbug.com/1144908): Consider extracting metadata into helper library
+// shared with webcodecs tests. This metadata is adapted from webcodecs/video-decoder-any.js.
+let vp9 = {
+  async buffer() { return (await fetch('vp9.mp4')).arrayBuffer(); },
+  // Note, file might not actually be level 1. See original metadata in webcodecs test suite.
+  codec: "vp09.00.10.08",
+  frames: [{offset: 44, size: 3315, type: 'key'},
+           {offset: 3359, size: 203, type: 'delta'},
+           {offset: 3562, size: 245, type: 'delta'},
+           {offset: 3807, size: 172, type: 'delta'},
+           {offset: 3979, size: 312, type: 'delta'},
+           {offset: 4291, size: 170, type: 'delta'},
+           {offset: 4461, size: 195, type: 'delta'},
+           {offset: 4656, size: 181, type: 'delta'},
+           {offset: 4837, size: 356, type: 'delta'},
+           {offset: 5193, size: 159, type: 'delta'}]
+};
+
+async function getOpenMediaSource(t) {
+  return new Promise(async resolve => {
+    const v = document.createElement('video');
+    document.body.appendChild(v);
+    const mediaSource = new MediaSource();
+    const url = URL.createObjectURL(mediaSource);
+    mediaSource.addEventListener('sourceopen', t.step_func(() => {
+      URL.revokeObjectURL(url);
+      assert_equals(mediaSource.readyState, 'open', 'MediaSource is open');
+      resolve([ v, mediaSource ]);
+    }), { once: true });
+    v.src = url;
+  });
+}
+
+promise_test(async t => {
+  let buffer = await vp9.buffer();
+  let [ videoElement, mediaSource ] = await getOpenMediaSource(t);
+  videoElement.controls = true;  // Makes early prototype demo playback easier to control manually.
+  let sourceBuffer = mediaSource.addSourceBuffer({ videoConfig: { codec: vp9.codec } });
+  let next_timestamp = 0;
+  let frame_duration = 100 * 1000;  // 100 milliseconds
+  // forEach with async callbacks makes it too easy to have uncaught rejections
+  // that don't fail this promise_test or even emit harness error.
+  // Iterating explicitly instead.
+  for (i = 0; i < vp9.frames.length; i++, next_timestamp += frame_duration) {
+    let frame_metadata = vp9.frames[i];
+    await sourceBuffer.appendEncodedChunks(new EncodedVideoChunk( {
+      type: frame_metadata.type,
+      timestamp: next_timestamp,
+      duration: frame_duration,
+      data: new Uint8Array(buffer, frame_metadata.offset, frame_metadata.size)
+    }));
+  }
+
+  mediaSource.endOfStream();
+
+  return new Promise( (resolve, reject) => {
+    videoElement.onended = resolve;
+    videoElement.onerror = reject;
+    videoElement.play();
+  });
+
+}, "Buffer EncodedVideoChunks (VP9) one-by-one and play them with MSE");
+
+// TODO(crbug.com/1144908): More exhaustive tests (multiple sourcebuffers,
+// varying append patterns, invalid append patterns; eventually more codecs,
+// out-of-order DTS, durations, etc.)
+
+</script>
+</html>
--- a/third_party/blink/web_tests/external/wpt/media-source/mse-for-webcodecs/tentative/vp9.mp4
+++ b/third_party/blink/web_tests/external/wpt/media-source/mse-for-webcodecs/tentative/vp9.mp4