Drop WebRTC audio data if OS has skipped frames.

This ensures that we get what is actually played out with what we feed to the echo canceler as far-end data back in sync as quickly as possible after a skip, which in turn reduces the risk of echo. We drop data in WebRtcAudioRenderer::Render. The fifo is created (if it doesn't exist) if the number of skipped frames is not 10 ms of data. This CL also removes creating a new fifo unecessarily if only the sink's frames per buffer changes but not the source's (when they differ). BUG=560371 Review URL: https://codereview.chromium.org/1596523005 Cr-Commit-Position: refs/heads/master@{#370686}

Drop WebRTC audio data if OS has skipped frames.
This ensures that we get what is actually played out with what we feed to the echo canceler as far-end data back in sync as quickly as possible after a skip, which in turn reduces the risk of echo. We drop data in WebRtcAudioRenderer::Render. The fifo is created (if it doesn't exist) if the number of skipped frames is not 10 ms of data. This CL also removes creating a new fifo unecessarily if only the sink's frames per buffer changes but not the source's (when they differ). BUG=560371 Review URL: https://codereview.chromium.org/1596523005 Cr-Commit-Position: refs/heads/master@{#370686}
b4bc1f2d · grunell · Commit bot · fb29092d · b4bc1f2d · b4bc1f2d
Commit b4bc1f2d authored Jan 21, 2016 by grunell Committed by Commit bot Jan 21, 2016
5 changed files
--- a/content/renderer/media/webrtc_audio_renderer.cc
+++ b/content/renderer/media/webrtc_audio_renderer.cc
@@ -39,6 +39,13 @@ namespace {
 // between each callback leads to one UMA update each 100ms.
 const int kNumCallbacksBetweenRenderTimeHistograms = 10;

+// Audio parameters that don't change.
+const media::AudioParameters::Format kFormat =
+    media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
+const media::ChannelLayout kChannelLayout = media::CHANNEL_LAYOUT_STEREO;
+const int kChannels = 2;
+const int kBitsPerSample = 16;
+
 // This is a simple wrapper class that's handed out to users of a shared
 // WebRtcAudioRenderer instance.  This class maintains the per-user 'playing'
 // and 'started' states to avoid problems related to incorrect usage which
@@ -187,12 +194,7 @@ WebRtcAudioRenderer::WebRtcAudioRenderer(
      play_ref_count_(0),
      start_ref_count_(0),
      audio_delay_milliseconds_(0),
-      fifo_delay_milliseconds_(0),
-      sink_params_(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
-                   media::CHANNEL_LAYOUT_STEREO,
-                   0,
-                   16,
-                   0),
+      sink_params_(kFormat, kChannelLayout, 0, kBitsPerSample, 0),
      output_device_id_(device_id),
      security_origin_(security_origin),
      render_callback_count_(0) {
@@ -434,6 +436,28 @@ int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
  DCHECK_LE(audio_delay_milliseconds, static_cast<uint32_t>(INT_MAX));
  audio_delay_milliseconds_ = static_cast<int>(audio_delay_milliseconds);

+  // If there are skipped frames, pull and throw away the same amount. We always
+  // pull 10 ms of data from the source (see PrepareSink()), so the fifo is only
+  // required if the number of frames to drop doesn't correspond to 10 ms.
+  if (frames_skipped > 0) {
+    const uint32_t source_frames_per_buffer =
+        static_cast<uint32_t>(sink_params_.sample_rate() / 100);
+    if (!audio_fifo_ && frames_skipped != source_frames_per_buffer) {
+      audio_fifo_.reset(new media::AudioPullFifo(
+          kChannels, source_frames_per_buffer,
+          base::Bind(&WebRtcAudioRenderer::SourceCallback,
+                     base::Unretained(this))));
+    }
+
+    scoped_ptr<media::AudioBus> drop_bus =
+        media::AudioBus::Create(audio_bus->channels(), frames_skipped);
+    if (audio_fifo_)
+      audio_fifo_->Consume(drop_bus.get(), drop_bus->frames());
+    else
+      SourceCallback(0, drop_bus.get());
+  }
+
+  // Pull the data we will deliver.
  if (audio_fifo_)
    audio_fifo_->Consume(audio_bus, audio_bus->frames());
  else
@@ -457,7 +481,9 @@ void WebRtcAudioRenderer::SourceCallback(
           << audio_bus->frames() << ")";

  int output_delay_milliseconds = audio_delay_milliseconds_;
-  output_delay_milliseconds += fifo_delay_milliseconds_;
+  output_delay_milliseconds += fifo_frame_delay *
+                               base::Time::kMillisecondsPerSecond /
+                               sink_params_.sample_rate();
  DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;

  // We need to keep render data for the |source_| regardless of |state_|,
@@ -606,55 +632,38 @@ void WebRtcAudioRenderer::PrepareSink() {
    UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
  }

-  // Set up audio parameters for the source, i.e., the WebRTC client.
+  // Calculate the frames per buffer for the source, i.e. the WebRTC client. We
+  // use 10 ms of data since the WebRTC client only supports multiples of 10 ms
+  // as buffer size where 10 ms is preferred for lowest possible delay.
+  const int source_frames_per_buffer = (sample_rate / 100);
+  DVLOG(1) << "Using WebRTC output buffer size: " << source_frames_per_buffer;

-  // The WebRTC client only supports multiples of 10ms as buffer size where
-  // 10ms is preferred for lowest possible delay.
-  const int frames_per_10ms = (sample_rate / 100);
-  DVLOG(1) << "Using WebRTC output buffer size: " << frames_per_10ms;
-  media::AudioParameters source_params(
-      media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
-      new_sink_params.channel_layout(), sample_rate, 16, frames_per_10ms);
-  source_params.set_channels_for_discrete(new_sink_params.channels());
-
-  const int frames_per_buffer = GetOptimalBufferSize(
+  // Setup sink parameters.
+  const int sink_frames_per_buffer = GetOptimalBufferSize(
      sample_rate, sink_->GetOutputParameters().frames_per_buffer());
-
-  new_sink_params.Reset(
-      new_sink_params.format(), new_sink_params.channel_layout(),
-      sample_rate, 16, frames_per_buffer);
+  new_sink_params.set_sample_rate(sample_rate);
+  new_sink_params.set_frames_per_buffer(sink_frames_per_buffer);

  // Create a FIFO if re-buffering is required to match the source input with
  // the sink request. The source acts as provider here and the sink as
  // consumer.
-  int new_fifo_delay_milliseconds = 0;
-  scoped_ptr<media::AudioPullFifo> new_audio_fifo;
-  if (source_params.frames_per_buffer() !=
-      new_sink_params.frames_per_buffer()) {
-    DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
-             << " to " << new_sink_params.frames_per_buffer();
-    new_audio_fifo.reset(new media::AudioPullFifo(
-        source_params.channels(), source_params.frames_per_buffer(),
-        base::Bind(&WebRtcAudioRenderer::SourceCallback,
-                   base::Unretained(this))));
-
-    if (new_sink_params.frames_per_buffer() >
-        source_params.frames_per_buffer()) {
-      int frame_duration_milliseconds =
-          base::Time::kMillisecondsPerSecond /
-          static_cast<double>(source_params.sample_rate());
-      new_fifo_delay_milliseconds = (new_sink_params.frames_per_buffer() -
-                                     source_params.frames_per_buffer()) *
-                                    frame_duration_milliseconds;
-    }
+  const bool different_source_sink_frames =
+      source_frames_per_buffer != new_sink_params.frames_per_buffer();
+  if (different_source_sink_frames) {
+    DVLOG(1) << "Rebuffering from " << source_frames_per_buffer << " to "
+             << new_sink_params.frames_per_buffer();
  }
-
  {
    base::AutoLock lock(lock_);
+    if ((!audio_fifo_ && different_source_sink_frames) ||
+        (audio_fifo_ &&
+         audio_fifo_->SizeInFrames() != source_frames_per_buffer)) {
+      audio_fifo_.reset(new media::AudioPullFifo(
+          kChannels, source_frames_per_buffer,
+          base::Bind(&WebRtcAudioRenderer::SourceCallback,
+                     base::Unretained(this))));
+    }
    sink_params_ = new_sink_params;
-    fifo_delay_milliseconds_ = new_fifo_delay_milliseconds;
-    if (new_audio_fifo.get())
-      audio_fifo_ = std::move(new_audio_fifo);
  }

  sink_->Initialize(new_sink_params, this);

--- a/content/renderer/media/webrtc_audio_renderer.h
+++ b/content/renderer/media/webrtc_audio_renderer.h
@@ -203,8 +203,8 @@ class CONTENT_EXPORT WebRtcAudioRenderer
  void OnPlayStateChanged(const blink::WebMediaStream& media_stream,
                          PlayingState* state);

-  // Updates |sink_params_|, |audio_fifo_| and |fifo_delay_milliseconds_| based
-  // on |sink_|, and initializes |sink_|.
+  // Updates |sink_params_| and |audio_fifo_| based on |sink_|, and initializes
+  // |sink_|.
  void PrepareSink();

  // The RenderFrame in which the audio is rendered into |sink_|.
@@ -241,9 +241,6 @@ class CONTENT_EXPORT WebRtcAudioRenderer
  // AEC.
  int audio_delay_milliseconds_;

-  // Delay due to the FIFO in milliseconds.
-  int fifo_delay_milliseconds_;
-
  base::TimeDelta current_time_;

  // Saved volume and playing state of the root renderer.

--- a/media/base/audio_pull_fifo.cc
+++ b/media/base/audio_pull_fifo.cc
@@ -46,6 +46,10 @@ void AudioPullFifo::Consume(AudioBus* destination, int frames_to_consume) {

 void AudioPullFifo::Clear() { fifo_index_ = fifo_->frames(); }

+int AudioPullFifo::SizeInFrames() const {
+  return fifo_->frames();
+}
+
 int AudioPullFifo::ReadFromFifo(AudioBus* destination,
                                int frames_to_provide,
                                int write_pos) {

--- a/media/base/audio_pull_fifo.h
+++ b/media/base/audio_pull_fifo.h
@@ -41,6 +41,9 @@ class MEDIA_EXPORT AudioPullFifo {
  // Empties the FIFO without deallocating any memory.
  void Clear();

+  // Returns the size of the fifo in number of frames.
+  int SizeInFrames() const;
+
 private:
  // Attempt to fulfill the request using what is available in the FIFO.
  // Append new data to the |destination| starting at |write_pos|.

--- a/media/base/audio_pull_fifo_unittest.cc
+++ b/media/base/audio_pull_fifo_unittest.cc
@@ -28,11 +28,15 @@ class AudioPullFifoTest
    : public testing::TestWithParam<int> {
 public:
  AudioPullFifoTest()
-    : pull_fifo_(kChannels, kMaxFramesInFifo, base::Bind(
-          &AudioPullFifoTest::ProvideInput, base::Unretained(this))),
-      audio_bus_(AudioBus::Create(kChannels, kMaxFramesInFifo)),
-      fill_value_(0),
-      last_frame_delay_(-1) {}
+      : pull_fifo_(kChannels,
+                   kMaxFramesInFifo,
+                   base::Bind(&AudioPullFifoTest::ProvideInput,
+                              base::Unretained(this))),
+        audio_bus_(AudioBus::Create(kChannels, kMaxFramesInFifo)),
+        fill_value_(0),
+        last_frame_delay_(-1) {
+    EXPECT_EQ(kMaxFramesInFifo, pull_fifo_.SizeInFrames());
+  }
  virtual ~AudioPullFifoTest() {}

  void VerifyValue(const float data[], int size, float start_value) {