Introduce a queue of audio buffers for tts audio playback

R=dmazzoni@chromium.org Test: run through all Tts Debug tests; manually using Eve/Hana; word callbacks; continuous read on large blocks; trigger lots of interruptions; start/end callback testing. Change-Id: I7ebc43620adbd5adc9de23ead8af12bde9f4bf14 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2465564Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Commit-Queue: David Tseng <dtseng@chromium.org> Cr-Commit-Position: refs/heads/master@{#816194}

Introduce a queue of audio buffers for tts audio playback
R=dmazzoni@chromium.org Test: run through all Tts Debug tests; manually using Eve/Hana; word callbacks; continuous read on large blocks; trigger lots of interruptions; start/end callback testing. Change-Id: I7ebc43620adbd5adc9de23ead8af12bde9f4bf14 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2465564Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Commit-Queue: David Tseng <dtseng@chromium.org> Cr-Commit-Position: refs/heads/master@{#816194}
5c7ae321 · David Tseng · Commit Bot · 7e150eef · 5c7ae321 · 5c7ae321
Commit 5c7ae321 authored Oct 12, 2020 by David Tseng Committed by Commit Bot Oct 12, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 115 additions and 82 deletions

chromeos/services/tts/tts_service.cc chromeos/services/tts/tts_service.cc +90 -68

chromeos/services/tts/tts_service.h chromeos/services/tts/tts_service.h +25 -14

No files found.
--- a/chromeos/services/tts/tts_service.cc
+++ b/chromeos/services/tts/tts_service.cc
@@ -41,9 +41,7 @@ void HandleLibraryLogging(int severity, const char* message) {
 // methods utilize C features only.

 TtsService::TtsService(mojo::PendingReceiver<mojom::TtsService> receiver)
-    : service_receiver_(this, std::move(receiver)),
-      stream_receiver_(this),
-      got_first_buffer_(false) {
+    : service_receiver_(this, std::move(receiver)), stream_receiver_(this) {
  if (setpriority(PRIO_PROCESS, 0, -10 /* real time audio */) != 0) {
    PLOG(ERROR) << "Unable to request real time priority; performance will be "
                   "impacted.";
@@ -62,7 +60,6 @@ TtsService::~TtsService() = default;
 void TtsService::BindTtsStream(
    mojo::PendingReceiver<mojom::TtsStream> receiver,
    mojo::PendingRemote<audio::mojom::StreamFactory> factory) {
-  base::AutoLock al(state_lock_);
  stream_receiver_.Bind(std::move(receiver));

  // TODO(accessibility): The sample rate below can change based on the audio
@@ -79,8 +76,6 @@ void TtsService::BindTtsStream(
 void TtsService::InstallVoice(const std::string& voice_name,
                              const std::vector<uint8_t>& voice_bytes,
                              InstallVoiceCallback callback) {
-  base::AutoLock al(state_lock_);
-
  // Create a directory to place extracted voice data.
  base::FilePath voice_data_path(kTempDataDirectory);
  voice_data_path = voice_data_path.Append(voice_name);
@@ -101,8 +96,6 @@ void TtsService::InstallVoice(const std::string& voice_name,

 void TtsService::SelectVoice(const std::string& voice_name,
                             SelectVoiceCallback callback) {
-  base::AutoLock al(state_lock_);
-
  base::FilePath path_prefix =
      base::FilePath(kTempDataDirectory).Append(voice_name);
  base::FilePath pipeline_path = path_prefix.Append("pipeline");
@@ -112,8 +105,6 @@ void TtsService::SelectVoice(const std::string& voice_name,

 void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
                       SpeakCallback callback) {
-  base::AutoLock al(state_lock_);
-
  tts_event_observer_.reset();
  auto pending_receiver = tts_event_observer_.BindNewPipeAndPassReceiver();
  std::move(callback).Run(std::move(pending_receiver));
@@ -125,16 +116,13 @@ void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
    return;
  }

-  // For lower end devices, pre-fetching the first buffer on the main thread is
-  // important. Not doing so can cause us to not respond quickly enough in the
-  // audio rendering thread/callback below.
-  size_t frames = 0;
-  first_buf_.first.clear();
-  first_buf_.first.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
-  first_buf_.second =
-      libchrometts_.GoogleTtsReadBuffered(&first_buf_.first[0], &frames);
-
  output_device_->Play();
+
+  is_playing_ = true;
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
+                     true /* is_first_buffer */));
 }

 void TtsService::Stop() {
@@ -143,7 +131,6 @@ void TtsService::Stop() {
 }

 void TtsService::SetVolume(float volume) {
-  base::AutoLock al(state_lock_);
  output_device_->SetVolume(volume);
 }

@@ -151,69 +138,104 @@ int TtsService::Render(base::TimeDelta delay,
                       base::TimeTicks delay_timestamp,
                       int prior_frames_skipped,
                       media::AudioBus* dest) {
-  // Careful to not block the render callback. Only try to acquire the lock
-  // here, but early return if we are processing a series of other calls. This
-  // can be extremely important if there's a long queue of pending Speak/Stop
-  // pairs being processed on the main thread. This can occur if the tts api
-  // receives lots of tts requests.
-  if (!state_lock_.Try())
-    return 0;
-
-  size_t frames = 0;
-  float* channel = dest->channel(0);
+  size_t frames_in_buf = 0;
  int32_t status = -1;
-  if (got_first_buffer_) {
-    status = libchrometts_.GoogleTtsReadBuffered(channel, &frames);
-  } else {
-    status = first_buf_.second;
-    float* buf = &first_buf_.first[0];
-    frames = first_buf_.first.size();
-    for (size_t i = 0; i < frames; i++)
-      channel[i] = buf[i];
-  }
-
-  if (status <= 0) {
-    // -1 means an error, 0 means done.
-    if (status == -1)
-      tts_event_observer_->OnError();
-
-    dest->Zero();
-    StopLocked();
-    state_lock_.Release();
-    return 0;
-  }
-
-  if (frames == 0) {
-    state_lock_.Release();
-    return 0;
+  int char_index = -1;
+  bool is_first_buffer = false;
+  {
+    base::AutoLock al(state_lock_);
+    if (buffers_.empty())
+      return 0;
+
+    const AudioBuffer& buf = buffers_.front();
+
+    status = buf.status;
+
+    // Done, 0, or error, -1.
+    if (status <= 0) {
+      if (status == -1)
+        tts_event_observer_->OnError();
+      else
+        tts_event_observer_->OnEnd();
+
+      StopLocked();
+      return 0;
+    }
+
+    char_index = buf.char_index;
+    is_first_buffer = buf.is_first_buffer;
+    const float* frames = &buf.frames[0];
+    frames_in_buf = buf.frames.size();
+    float* channel = dest->channel(0);
+    for (size_t i = 0; i < frames_in_buf; i++)
+      channel[i] = frames[i];
+    buffers_.pop_front();
  }

-  if (!got_first_buffer_) {
-    got_first_buffer_ = true;
+  if (is_first_buffer)
    tts_event_observer_->OnStart();
-  }

-  // There's only really ever one timepoint since we play this buffer in one
-  // chunk.
-  int char_index = -1;
-  if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
-    char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
+  if (frames_in_buf == 0)
+    return 0;

  if (char_index != -1)
    tts_event_observer_->OnTimepoint(char_index);

-  state_lock_.Release();
-  return frames;
+  return frames_in_buf;
 }

 void TtsService::OnRenderError() {}

 void TtsService::StopLocked() {
+  if (!is_playing_)
+    return;
+
  output_device_->Pause();
-  libchrometts_.GoogleTtsFinalizeBuffered();
-  if (tts_event_observer_ && got_first_buffer_)
-    tts_event_observer_->OnEnd();
-  got_first_buffer_ = false;
+  buffers_.clear();
+  is_playing_ = false;
+}
+
+void TtsService::ReadMoreFrames(bool is_first_buffer) {
+  if (!is_playing_)
+    return;
+
+  AudioBuffer buf;
+  buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
+  size_t frames_in_buf = 0;
+  buf.status =
+      libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
+
+  buf.frames.resize(frames_in_buf);
+
+  buf.char_index = -1;
+  if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
+    buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
+
+  buf.is_first_buffer = is_first_buffer;
+
+  {
+    base::AutoLock al(state_lock_);
+    buffers_.emplace_back(std::move(buf));
+  }
+
+  if (buf.status <= 0)
+    return;
+
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
+                     false /* is_first_buffer */));
+}
+
+TtsService::AudioBuffer::AudioBuffer() = default;
+
+TtsService::AudioBuffer::~AudioBuffer() = default;
+
+TtsService::AudioBuffer::AudioBuffer(TtsService::AudioBuffer&& other) {
+  frames.swap(other.frames);
+  status = other.status;
+  char_index = other.char_index;
+  is_first_buffer = other.is_first_buffer;
 }

 }  // namespace tts

--- a/chromeos/services/tts/tts_service.h
+++ b/chromeos/services/tts/tts_service.h
@@ -54,6 +54,8 @@ class TtsService : public mojom::TtsService,
  // Handles stopping tts.
  void StopLocked() EXCLUSIVE_LOCKS_REQUIRED(state_lock_);

+  void ReadMoreFrames(bool is_first_buffer);
+
  // Connection to tts in the browser.
  mojo::Receiver<mojom::TtsService> service_receiver_;

@@ -61,26 +63,35 @@ class TtsService : public mojom::TtsService,
  base::Lock state_lock_;

  // Prebuilt.
-  LibChromeTtsLoader libchrometts_ GUARDED_BY(state_lock_);
+  LibChromeTtsLoader libchrometts_;

  // Connection to tts in the component extension.
-  mojo::Receiver<mojom::TtsStream> stream_receiver_ GUARDED_BY(state_lock_);
+  mojo::Receiver<mojom::TtsStream> stream_receiver_;

  // Connection to send tts events to component extension.
-  mojo::Remote<mojom::TtsEventObserver> tts_event_observer_
-      GUARDED_BY(state_lock_);
+  mojo::Remote<mojom::TtsEventObserver> tts_event_observer_;

  // Outputs speech synthesis to audio.
-  std::unique_ptr<audio::OutputDevice> output_device_ GUARDED_BY(state_lock_);
-
-  // Tracks whether any audio data came as a result of |Speak|. Reset for every
-  // call to |Speak|.
-  bool got_first_buffer_ GUARDED_BY(state_lock_);
-
-  // The first buffer; used for prefetching/warming up the engine for a new
-  // utterance. The first item is the audio data, the second is the status
-  // returned by a call to GoogleTtsReadBuffered.
-  std::pair<std::vector<float>, size_t> first_buf_;
+  std::unique_ptr<audio::OutputDevice> output_device_;
+
+  // Helper group of state to pass from main thread to audio thread.
+  struct AudioBuffer {
+    AudioBuffer();
+    ~AudioBuffer();
+    AudioBuffer(const AudioBuffer& other) = delete;
+    AudioBuffer(AudioBuffer&& other);
+
+    std::vector<float> frames;
+    int char_index;
+    int status;
+    bool is_first_buffer;
+  };
+
+  // The queue of audio buffers to be played by the audio thread.
+  std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
+
+  // Tracks whether the output device is playing audio.
+  bool is_playing_ = false;
 };

 }  // namespace tts