Espeak native tts audio playback support

See the Espeak-ng repo for Chrome here for required changes there: https://chromium.googlesource.com/chromiumos/third_party/espeak-ng/ Depends on https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2538938 (for try/catch while the cros sdk in telemetry is out of sync) and https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2538477 (for engine changes to adhere to the mojo changes here). Bug: 1146417, 1146418, 1146420 Test: fixes issues above Change-Id: I8057a881830e5b51671770fa654f742ecd3f47b0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2515786 Commit-Queue: David Tseng <dtseng@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Reviewed-by: Daniel Cheng <dcheng@chromium.org> Reviewed-by: calamity <calamity@chromium.org> Cr-Commit-Position: refs/heads/master@{#829092}

Espeak native tts audio playback support
See the Espeak-ng repo for Chrome here for required changes there: https://chromium.googlesource.com/chromiumos/third_party/espeak-ng/ Depends on https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2538938 (for try/catch while the cros sdk in telemetry is out of sync) and https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2538477 (for engine changes to adhere to the mojo changes here). Bug: 1146417, 1146418, 1146420 Test: fixes issues above Change-Id: I8057a881830e5b51671770fa654f742ecd3f47b0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2515786 Commit-Queue: David Tseng <dtseng@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Reviewed-by: Daniel Cheng <dcheng@chromium.org> Reviewed-by: calamity <calamity@chromium.org> Cr-Commit-Position: refs/heads/master@{#829092}
696012e9 · David Tseng · Commit Bot · 422bc672 · 696012e9 · 696012e9
Commit 696012e9 authored Nov 19, 2020 by David Tseng Committed by Commit Bot Nov 19, 2020
15 changed files
--- a/chrome/browser/extensions/chrome_extensions_browser_interface_binders.cc
+++ b/chrome/browser/extensions/chrome_extensions_browser_interface_binders.cc
@@ -76,12 +76,12 @@ void BindHandwritingRecognizerRequestor(
 }
 #endif  // BUILDFLAG(GOOGLE_CHROME_BRANDING)

-void BindTtsStream(
+void BindTtsStreamFactory(
    content::RenderFrameHost* render_frame_host,
-    mojo::PendingReceiver<chromeos::tts::mojom::TtsStream> receiver) {
+    mojo::PendingReceiver<chromeos::tts::mojom::TtsStreamFactory> receiver) {
  TtsEngineExtensionObserverChromeOS::GetInstance(
      Profile::FromBrowserContext(render_frame_host->GetBrowserContext()))
-      ->BindTtsStream(std::move(receiver));
+      ->BindTtsStreamFactory(std::move(receiver));
 }

 void BindRemoteAppsFactory(
@@ -168,9 +168,11 @@ void PopulateChromeFrameBindersForExtension(
        base::BindRepeating(&chromeos::CameraAppUI::ConnectToCameraAppHelper));
  }

-  if (extension->id() == extension_misc::kGoogleSpeechSynthesisExtensionId) {
-    binder_map->Add<chromeos::tts::mojom::TtsStream>(
-        base::BindRepeating(&BindTtsStream));
+  // TODO: extend to more extensions.
+  if (extension->id() == extension_misc::kGoogleSpeechSynthesisExtensionId ||
+      extension->id() == extension_misc::kEspeakSpeechSynthesisExtensionId) {
+    binder_map->Add<chromeos::tts::mojom::TtsStreamFactory>(
+        base::BindRepeating(&BindTtsStreamFactory));
  }

  if (chromeos::RemoteAppsImpl::IsAllowed(render_frame_host, extension)) {

--- a/chrome/browser/speech/extension_api/tts_engine_extension_observer_chromeos.cc
+++ b/chrome/browser/speech/extension_api/tts_engine_extension_observer_chromeos.cc
@@ -144,27 +144,33 @@ TtsEngineExtensionObserverChromeOS::TtsEngineExtensionObserverChromeOS(
 TtsEngineExtensionObserverChromeOS::~TtsEngineExtensionObserverChromeOS() =
    default;

-void TtsEngineExtensionObserverChromeOS::BindTtsStream(
-    mojo::PendingReceiver<chromeos::tts::mojom::TtsStream> receiver) {
+void TtsEngineExtensionObserverChromeOS::BindTtsStreamFactory(
+    mojo::PendingReceiver<chromeos::tts::mojom::TtsStreamFactory> receiver) {
  // At this point, the component extension has loaded, and the js has requested
-  // a TtsStream be bound. It's safe now to update the keep alive count for
-  // important accessibility features. This path is also encountered if the
+  // a TtsStreamFactory be bound. It's safe now to update the keep alive count
+  // for important accessibility features. This path is also encountered if the
  // component extension background page forceably window.close(s) on error.
  UpdateGoogleSpeechSynthesisKeepAliveCountOnReload(profile_);

-  // Always launch a new TtsService. By assigning below, if |tts_service_| held
-  // a remote, it will be killed and a new one created, ensuring we only ever
-  // have one TtsService running.
-  tts_service_ =
-      content::ServiceProcessHost::Launch<chromeos::tts::mojom::TtsService>(
-          content::ServiceProcessHost::Options()
-              .WithDisplayName("TtsService")
-              .Pass());
+  // Only launch a new TtsService if necessary. By assigning below, if
+  // |tts_service_| held a remote, it will be killed and a new one created,
+  // ensuring we only ever have one TtsService running.
+  if (!tts_service_) {
+    tts_service_ =
+        content::ServiceProcessHost::Launch<chromeos::tts::mojom::TtsService>(
+            content::ServiceProcessHost::Options()
+                .WithDisplayName("TtsService")
+                .Pass());
+  }

+  // Always create a new audio stream for the tts stream. It is assumed once the
+  // tts stream is reset by the service, the audio stream is appropriately
+  // cleaned up by the audio service.
  mojo::PendingRemote<audio::mojom::StreamFactory> factory_remote;
  auto factory_receiver = factory_remote.InitWithNewPipeAndPassReceiver();
  content::GetAudioService().BindStreamFactory(std::move(factory_receiver));
-  tts_service_->BindTtsStream(std::move(receiver), std::move(factory_remote));
+  tts_service_->BindTtsStreamFactory(std::move(receiver),
+                                     std::move(factory_remote));
 }

 void TtsEngineExtensionObserverChromeOS::Shutdown() {

--- a/chrome/browser/speech/extension_api/tts_engine_extension_observer_chromeos.h
+++ b/chrome/browser/speech/extension_api/tts_engine_extension_observer_chromeos.h
@@ -33,8 +33,8 @@ class TtsEngineExtensionObserverChromeOS

  Profile* profile() { return profile_; }

-  void BindTtsStream(
-      mojo::PendingReceiver<chromeos::tts::mojom::TtsStream> receiver);
+  void BindTtsStreamFactory(
+      mojo::PendingReceiver<chromeos::tts::mojom::TtsStreamFactory> receiver);

  // Implementation of KeyedService.
  void Shutdown() override;

--- a/chrome/renderer/extensions/chrome_extensions_dispatcher_delegate.cc
+++ b/chrome/renderer/extensions/chrome_extensions_dispatcher_delegate.cc
@@ -175,9 +175,10 @@ void ChromeExtensionsDispatcherDelegate::PopulateSourceMap(
  source_map->RegisterSource("chromeos.ime.service",
                             IDR_IME_SERVICE_BINDINGS_JS);

-  source_map->RegisterSource("chromeos.tts.mojom.tts_stream.mojom",
-                             IDR_TTS_STREAM_MOJOM_JS);
-  source_map->RegisterSource("chromeos.tts.stream", IDR_TTS_STREAM_BINDINGS_JS);
+  source_map->RegisterSource("chromeos.tts.mojom.tts_stream_factory.mojom",
+                             IDR_TTS_STREAM_FACTORY_MOJOM_JS);
+  source_map->RegisterSource("chromeos.tts.stream_factory",
+                             IDR_TTS_STREAM_FACTORY_BINDINGS_JS);

  // Imprivata API.
  source_map->RegisterSource("chromeos.remote_apps.mojom-lite",

--- a/chrome/renderer/resources/extensions/chromeos_tts_stream_bindings.js
+++ b/chrome/renderer/resources/extensions/chromeos_tts_stream_bindings.js
@@ -9,11 +9,11 @@ if ((typeof mojo === 'undefined') || !mojo.bindingsLibraryInitialized) {
 }
 mojo.config.autoLoadMojomDeps = false;

-loadScript('chromeos.tts.mojom.tts_stream.mojom');
+loadScript('chromeos.tts.mojom.tts_stream_factory.mojom');

 (function() {
-  let ptr = new chromeos.tts.mojom.TtsStreamPtr;
+  let ptr = new chromeos.tts.mojom.TtsStreamFactoryPtr;
  Mojo.bindInterface(
-      chromeos.tts.mojom.TtsStream.name, mojo.makeRequest(ptr).handle);
+      chromeos.tts.mojom.TtsStreamFactory.name, mojo.makeRequest(ptr).handle);
  exports.$set('returnValue', ptr);
 })();
--- a/chrome/renderer/resources/renderer_resources.grd
+++ b/chrome/renderer/resources/renderer_resources.grd
@@ -70,8 +70,8 @@
          <include name="IDR_REMOTE_APPS_MOJOM_LITE_JS" file="${mojom_root}\chromeos\components\remote_apps\mojom\remote_apps.mojom-lite.js" use_base_dir="false" type="BINDATA" />
          <include name="IDR_MOJO_URL_MOJOM_LITE_JS" file="${mojom_root}\url\mojom\url.mojom-lite.js" use_base_dir="false" type="BINDATA" />

-          <include name="IDR_TTS_STREAM_BINDINGS_JS" file="extensions\chromeos_tts_stream_bindings.js" type="BINDATA" />
-          <include name="IDR_TTS_STREAM_MOJOM_JS" file="${mojom_root}\chromeos\services\tts\public\mojom\tts_service.mojom.js" use_base_dir="false" type="BINDATA" />
+          <include name="IDR_TTS_STREAM_FACTORY_BINDINGS_JS" file="extensions\chromeos_tts_stream_factory_bindings.js" type="BINDATA" />
+          <include name="IDR_TTS_STREAM_FACTORY_MOJOM_JS" file="${mojom_root}\chromeos\services\tts\public\mojom\tts_service.mojom.js" use_base_dir="false" type="BINDATA" />
        </if>
        <!-- Media Router Mojo service and bindings. -->
        <include name="IDR_MEDIA_CONTROLLER_MOJOM_JS" file="${mojom_root}\components\media_router\common\mojom\media_controller.mojom.js" use_base_dir="false" type="BINDATA" />

--- a/chromeos/services/tts/BUILD.gn
+++ b/chromeos/services/tts/BUILD.gn
@@ -8,6 +8,10 @@ source_set("tts") {
  sources = [
    "constants.cc",
    "constants.h",
+    "google_tts_stream.cc",
+    "google_tts_stream.h",
+    "playback_tts_stream.cc",
+    "playback_tts_stream.h",
    "tts_service.cc",
    "tts_service.h",
  ]

--- a/chromeos/services/tts/chrome_tts.h
+++ b/chromeos/services/tts/chrome_tts.h
@@ -15,10 +15,10 @@ bool GoogleTtsInit(const char* pipeline_path, const char* path_prefix);
 void GoogleTtsShutdown();

 bool GoogleTtsInstallVoice(const char* voice_name,
-                           const char* voice_bytes,
+                           const uint8_t* voice_bytes,
                           int size);

-bool GoogleTtsInitBuffered(const char* text_jspb, int text_jspb_len);
+bool GoogleTtsInitBuffered(const uint8_t* text_jspb, int text_jspb_len);

 int GoogleTtsReadBuffered(float* audio_channel_buffer, size_t* frames_written);


--- a/chromeos/services/tts/google_tts_stream.cc
+++ b/chromeos/services/tts/google_tts_stream.cc
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chromeos/services/tts/google_tts_stream.h"
+
+#include <dlfcn.h>
+#include <sys/resource.h>
+
+#include "base/files/file_util.h"
+#include "chromeos/services/tts/constants.h"
+#include "chromeos/services/tts/tts_service.h"
+
+namespace chromeos {
+namespace tts {
+
+// Simple helper to bridge logging in the shared library to Chrome's logging.
+void HandleLibraryLogging(int severity, const char* message) {
+  switch (severity) {
+    case logging::LOG_INFO:
+      // Suppressed.
+      break;
+    case logging::LOG_WARNING:
+      LOG(WARNING) << message;
+      break;
+    case logging::LOG_FATAL:
+    case logging::LOG_ERROR:
+      LOG(ERROR) << message;
+      break;
+    default:
+      break;
+  }
+}
+
+// GoogleTtsStream is mostly glue code that adapts the TtsStream interface into
+// a form needed by libchrometts.so. As is convention with shared objects, the
+// lifetime of all arguments passed to the library is scoped to the function.
+//
+// To keep the library interface stable and prevent name mangling, all library
+// methods utilize C features only.
+
+GoogleTtsStream::GoogleTtsStream(
+    TtsService* owner,
+    mojo::PendingReceiver<mojom::GoogleTtsStream> receiver)
+    : owner_(owner), stream_receiver_(this, std::move(receiver)) {
+  bool loaded = libchrometts_.Load(kLibchromettsPath);
+  if (!loaded) {
+    LOG(ERROR) << "Unable to load libchrometts.so.";
+    exit(0);
+  } else {
+    libchrometts_.GoogleTtsSetLogger(HandleLibraryLogging);
+  }
+
+  stream_receiver_.set_disconnect_handler(base::BindOnce(
+      [](TtsService* owner) {
+        // The remote which lives in component extension js has been
+        // disconnected due to destruction or error.
+        owner->MaybeExit();
+      },
+      owner));
+}
+
+GoogleTtsStream::~GoogleTtsStream() = default;
+
+bool GoogleTtsStream::IsBound() const {
+  return stream_receiver_.is_bound();
+}
+
+void GoogleTtsStream::InstallVoice(const std::string& voice_name,
+                                   const std::vector<uint8_t>& voice_bytes,
+                                   InstallVoiceCallback callback) {
+  // Create a directory to place extracted voice data.
+  base::FilePath voice_data_path(kTempDataDirectory);
+  voice_data_path = voice_data_path.Append(voice_name);
+  if (base::DirectoryExists(voice_data_path)) {
+    std::move(callback).Run(true);
+    return;
+  }
+
+  if (!base::CreateDirectoryAndGetError(voice_data_path, nullptr)) {
+    std::move(callback).Run(false);
+    return;
+  }
+
+  std::move(callback).Run(libchrometts_.GoogleTtsInstallVoice(
+      voice_data_path.value().c_str(), &voice_bytes[0], voice_bytes.size()));
+}
+
+void GoogleTtsStream::SelectVoice(const std::string& voice_name,
+                                  SelectVoiceCallback callback) {
+  base::FilePath path_prefix =
+      base::FilePath(kTempDataDirectory).Append(voice_name);
+  base::FilePath pipeline_path = path_prefix.Append("pipeline");
+  std::move(callback).Run(libchrometts_.GoogleTtsInit(
+      pipeline_path.value().c_str(), path_prefix.value().c_str()));
+}
+
+void GoogleTtsStream::Speak(const std::vector<uint8_t>& text_jspb,
+                            SpeakCallback callback) {
+  bool status =
+      libchrometts_.GoogleTtsInitBuffered(&text_jspb[0], text_jspb.size());
+  if (!status) {
+    stream_receiver_.reset();
+    owner_->MaybeExit();
+    return;
+  }
+
+  owner_->Play(std::move(callback));
+  is_buffering_ = true;
+
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&GoogleTtsStream::ReadMoreFrames,
+                     weak_factory_.GetWeakPtr(), true /* is_first_buffer */));
+}
+
+void GoogleTtsStream::Stop() {
+  owner_->Stop();
+  is_buffering_ = false;
+  libchrometts_.GoogleTtsFinalizeBuffered();
+}
+
+void GoogleTtsStream::SetVolume(float volume) {
+  owner_->SetVolume(volume);
+}
+
+void GoogleTtsStream::Pause() {
+  owner_->Pause();
+}
+
+void GoogleTtsStream::Resume() {
+  owner_->Resume();
+}
+
+void GoogleTtsStream::ReadMoreFrames(bool is_first_buffer) {
+  if (!is_buffering_) {
+    return;
+  }
+
+  TtsService::AudioBuffer buf;
+  buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
+  size_t frames_in_buf = 0;
+  buf.status =
+      libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
+
+  buf.frames.resize(frames_in_buf);
+
+  buf.char_index = -1;
+  if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
+    buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
+
+  buf.is_first_buffer = is_first_buffer;
+
+  owner_->AddAudioBuffer(std::move(buf));
+
+  if (buf.status <= 0)
+    return;
+
+  base::ThreadTaskRunnerHandle::Get()->PostTask(
+      FROM_HERE,
+      base::BindOnce(&GoogleTtsStream::ReadMoreFrames,
+                     weak_factory_.GetWeakPtr(), false /* is_first_buffer */));
+}
+
+}  // namespace tts
+}  // namespace chromeos
--- a/chromeos/services/tts/google_tts_stream.h
+++ b/chromeos/services/tts/google_tts_stream.h
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROMEOS_SERVICES_TTS_GOOGLE_TTS_STREAM_H_
+#define CHROMEOS_SERVICES_TTS_GOOGLE_TTS_STREAM_H_
+
+#include "chromeos/services/tts/public/mojom/tts_service.mojom.h"
+#include "library_loaders/libchrometts.h"
+#include "mojo/public/cpp/bindings/receiver.h"
+#include "mojo/public/cpp/bindings/remote.h"
+
+namespace chromeos {
+namespace tts {
+
+class TtsService;
+
+class GoogleTtsStream : public mojom::GoogleTtsStream {
+ public:
+  GoogleTtsStream(TtsService* owner,
+                  mojo::PendingReceiver<mojom::GoogleTtsStream> receiver);
+  ~GoogleTtsStream() override;
+
+  bool IsBound() const;
+
+ private:
+  // mojom::GoogleTtsStream:
+  void InstallVoice(const std::string& voice_name,
+                    const std::vector<uint8_t>& voice_bytes,
+                    InstallVoiceCallback callback) override;
+  void SelectVoice(const std::string& voice_name,
+                   SelectVoiceCallback callback) override;
+  void Speak(const std::vector<uint8_t>& text_jspb,
+             SpeakCallback callback) override;
+  void Stop() override;
+  void SetVolume(float volume) override;
+  void Pause() override;
+  void Resume() override;
+
+  void ReadMoreFrames(bool is_first_buffer);
+
+  // Owning service.
+  TtsService* owner_;
+
+  // Prebuilt.
+  LibChromeTtsLoader libchrometts_;
+
+  // Connection to tts in the component extension.
+  mojo::Receiver<mojom::GoogleTtsStream> stream_receiver_;
+
+  // Whether buffering is in progress.
+  bool is_buffering_ = false;
+
+  base::WeakPtrFactory<GoogleTtsStream> weak_factory_{this};
+};
+
+}  // namespace tts
+}  // namespace chromeos
+
+#endif  // CHROMEOS_SERVICES_TTS_GOOGLE_TTS_STREAM_H_
--- a/chromeos/services/tts/playback_tts_stream.cc
+++ b/chromeos/services/tts/playback_tts_stream.cc
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chromeos/services/tts/playback_tts_stream.h"
+
+#include "chromeos/services/tts/constants.h"
+#include "chromeos/services/tts/tts_service.h"
+
+namespace chromeos {
+namespace tts {
+
+PlaybackTtsStream::PlaybackTtsStream(
+    TtsService* owner,
+    mojo::PendingReceiver<mojom::PlaybackTtsStream> receiver)
+    : owner_(owner), stream_receiver_(this, std::move(receiver)) {
+  stream_receiver_.set_disconnect_handler(base::BindOnce(
+      [](TtsService* owner) {
+        // The remote which lives in component extension js has been
+        // disconnected due to destruction or error.
+        owner->MaybeExit();
+      },
+      owner));
+}
+
+PlaybackTtsStream::~PlaybackTtsStream() = default;
+
+bool PlaybackTtsStream::IsBound() const {
+  return stream_receiver_.is_bound();
+}
+
+void PlaybackTtsStream::Play(PlayCallback callback) {
+  owner_->Play(std::move(callback));
+
+  // A small buffer to signal the start of the audio for this utterance.
+  TtsService::AudioBuffer buf;
+  buf.frames.resize(1, 0);
+  buf.status = 1;
+  buf.is_first_buffer = true;
+  owner_->AddAudioBuffer(std::move(buf));
+}
+
+void PlaybackTtsStream::SendAudioBuffer(
+    const std::vector<float>& samples_buffer,
+    int32_t char_index,
+    bool is_done) {
+  TtsService::AudioBuffer buf;
+  buf.frames = samples_buffer;
+  buf.status = is_done ? 0 : 1;
+  buf.char_index = char_index;
+  owner_->AddAudioBuffer(std::move(buf));
+}
+
+void PlaybackTtsStream::Stop() {
+  owner_->Stop();
+}
+
+void PlaybackTtsStream::SetVolume(float volume) {
+  owner_->SetVolume(volume);
+}
+
+void PlaybackTtsStream::Pause() {
+  owner_->Pause();
+}
+
+void PlaybackTtsStream::Resume() {
+  owner_->Resume();
+}
+
+}  // namespace tts
+}  // namespace chromeos
--- a/chromeos/services/tts/playback_tts_stream.h
+++ b/chromeos/services/tts/playback_tts_stream.h
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROMEOS_SERVICES_TTS_PLAYBACK_TTS_STREAM_H_
+#define CHROMEOS_SERVICES_TTS_PLAYBACK_TTS_STREAM_H_
+
+#include "chromeos/services/tts/public/mojom/tts_service.mojom.h"
+#include "mojo/public/cpp/bindings/receiver.h"
+#include "mojo/public/cpp/bindings/remote.h"
+
+namespace chromeos {
+namespace tts {
+
+class TtsService;
+
+class PlaybackTtsStream : public mojom::PlaybackTtsStream {
+ public:
+  PlaybackTtsStream(TtsService* owner,
+                    mojo::PendingReceiver<mojom::PlaybackTtsStream> receiver);
+  ~PlaybackTtsStream() override;
+
+  bool IsBound() const;
+
+ private:
+  // mojom::PlaybackTtsStream:
+  void Play(PlayCallback callback) override;
+  void SendAudioBuffer(const std::vector<float>& samples_buffer,
+                       int32_t char_index,
+                       bool is_done) override;
+  void Stop() override;
+  void SetVolume(float volume) override;
+  void Pause() override;
+  void Resume() override;
+
+  // Owning service.
+  TtsService* owner_;
+
+  // Connection to tts in the component extension.
+  mojo::Receiver<mojom::PlaybackTtsStream> stream_receiver_;
+};
+
+}  // namespace tts
+}  // namespace chromeos
+
+#endif  // CHROMEOS_SERVICES_TTS_PLAYBACK_TTS_STREAM_H_
--- a/chromeos/services/tts/public/mojom/tts_service.mojom
+++ b/chromeos/services/tts/public/mojom/tts_service.mojom
@@ -6,17 +6,49 @@ module chromeos.tts.mojom;

 import "services/audio/public/mojom/stream_factory.mojom";

-// The main interface to the TTS engine on Chrome OS. Only used by and private
-// to the Chrome OS Google TTS engine component extension. TtsService lives in a
-// tts-sandboxed process.  TtsEngineExtensionObserver, the other end of this
-// interface, in the browser process, brokers a connection between TtsService
-// and the Google TTS engine component extension through a TtsStream, but does
-// not participate otherwise.
+// The main interface to TTS engines on Chrome OS. Only used by and private to
+// the Chrome OS Google and Espeak TTS engine component extensions. TtsService
+// lives in a tts-sandboxed process.  TtsEngineExtensionObserver, the other end
+// of this interface, in the browser process, brokers a connection between
+// TtsService and the TTS engine component extension through a TtsStreamFactory,
+// but does not participate otherwise.
+//
+// Graph of audio data flow:
+//
+//                        <-component extension<-----------------
+//                        /                                      \
+// [Google|Playback]Stream                       TtsEventObserver
+//            |                                         |
+//            -----------              -----------------
+//                       \             /
+//                       ->TtsService->
+//                           <->
+//                    AudioOutputStream
+//                           <->
+//                     AudioService
 interface TtsService {
-  // Binds a TtsStream to this service and returns an AudioOutputStream receiver
-  // which this service uses to play audio.
-  BindTtsStream(pending_receiver<TtsStream> receiver,
-                pending_remote<audio.mojom.StreamFactory> stream_factory);
+  // Binds a TtsStreamFactory implemented by this service. Returns an
+  // AudioOutputStream remote which is bound to AudioService in the browser and
+  // which pulls audio data for output.
+  BindTtsStreamFactory(pending_receiver<TtsStreamFactory> receiver,
+      pending_remote<audio.mojom.StreamFactory> stream_factory);
+};
+
+// Interface to create various types of tts streams.
+//
+// The remote is in a component extension; the receiver is the tts ervice
+// utility process.
+interface TtsStreamFactory {
+  // Obtains the tts stream used by the Google tts component extension.
+  CreateGoogleTtsStream() => (pending_remote<GoogleTtsStream> stream);
+
+  // Obtains the tts stream used by extensions that can perform audio generation
+  // and only require output services. Currently used only by the private Espeak
+  // tts engine extension.
+  CreatePlaybackTtsStream() => (
+      pending_remote<PlaybackTtsStream> stream,
+      int32 sample_rate,
+      int32 buffer_size);
 };

 // Interface for the Google component TTS engine to control
@@ -41,7 +73,7 @@ interface TtsService {
 //
 // Note that the component extension may call Stop() early, if the TTS api
 // wants to, for example, stop speech.
-interface TtsStream {
+interface GoogleTtsStream {
  // Forward and install the |voice_name| encoded by |voice_bytes|.
  InstallVoice(string voice_name, array<uint8> voice_bytes)
      => (bool success);
@@ -66,8 +98,59 @@ interface TtsStream {
  Resume();
 };

-// Returned to callers of TtsStream.speak(). It receives notable events
-// pertaining to the text spoken.
+// Interface for a tts engine to control the TtsService's production of audio
+// for engines like Espeak, which send raw audio data.
+//
+// The remote is in a component extension; the receiver is the tts ervice
+// utility process.
+//
+// Example usage:
+// Play() - starts playback of an utterance like 'hello world'
+// SendAudioBuffer(<first 1024 frames of audio>, 0, false)
+// SendAudioBuffer(<1024 more frames of audio>, -1, false)
+// ...
+// SendAudioBuffer(<1024 more frames of audio>, 4, false)
+// ...
+// SendAudioBuffer(<1024 more frames of audio>, 11, true)
+// Stop()
+//
+// Note: Espeak-ng is currently the only remote utilizing this interface. It is
+// a private component extension to CHrome OS. Its sources can be found here:
+// https://chromium.googlesource.com/chromiumos/third_party/espeak-ng
+interface PlaybackTtsStream {
+  // Start playback of audio.
+  Play()
+      => (pending_receiver<TtsEventObserver> event_observer);
+
+  // Send audio data to the tts service; expected to be called after Play,
+  // Resume and before Stop, Pause.
+  //
+  // |char_index| annotates the |frames_buffer| as the character index within
+  // the text being spoken. This is pass along in
+  // |TtsEventObserver.onTimepoint| at the moment when the buffer is being
+  // played.
+  //
+  // |last_buffer| indicates whether or not this is the last buffer of a
+  // particular tts utterance string.
+  SendAudioBuffer(
+      array<float> frames_buffer, int32 char_index, bool last_buffer);
+
+  // Stops on-going audio playback.
+  Stop();
+
+  // Sets volume of audio playback (0.0 to 1.0).
+  SetVolume(float volume);
+
+  // Pauses audio playback.
+  Pause();
+
+  // Resumes audio playback.
+  Resume();
+};
+
+// Returned to callers of GoogleTtsStream.speak() and
+// PlaybackTtsStream.Play(). It receives notable events pertaining to the text
+// spoken.
 interface TtsEventObserver {
  // TtsStream.Speak started speech playback.
  OnStart();

--- a/chromeos/services/tts/tts_service.cc
+++ b/chromeos/services/tts/tts_service.cc
@@ -16,118 +16,66 @@
 namespace chromeos {
 namespace tts {

-// Simple helper to bridge logging in the shared library to Chrome's logging.
-void HandleLibraryLogging(int severity, const char* message) {
-  switch (severity) {
-    case logging::LOG_INFO:
-      // Suppressed.
-      break;
-    case logging::LOG_WARNING:
-      LOG(WARNING) << message;
-      break;
-    case logging::LOG_ERROR:
-      LOG(ERROR) << message;
-      break;
-    default:
-      break;
-  }
-}
-
-// TtsService is mostly glue code that adapts the TtsStream interface into a
-// form needed by libchrometts.so. As is convention with shared objects, the
-// lifetime of all arguments passed to the library is scoped to the function.
-//
-// To keep the library interface stable and prevent name mangling, all library
-// methods utilize C features only.
+namespace {
+constexpr int kDefaultSampleRate = 22050;
+constexpr int kDefaultBufferSize = 512;
+}  // namespace

 TtsService::TtsService(mojo::PendingReceiver<mojom::TtsService> receiver)
-    : service_receiver_(this, std::move(receiver)), stream_receiver_(this) {
+    : service_receiver_(this, std::move(receiver)), tts_stream_factory_(this) {
  if (setpriority(PRIO_PROCESS, 0, -10 /* real time audio */) != 0) {
    PLOG(ERROR) << "Unable to request real time priority; performance will be "
                   "impacted.";
  }
-  bool loaded = libchrometts_.Load(kLibchromettsPath);
-  if (!loaded) {
-    LOG(ERROR) << "Unable to load libchrometts.so.";
-    exit(0);
-  } else {
-    libchrometts_.GoogleTtsSetLogger(HandleLibraryLogging);
-  }
 }

 TtsService::~TtsService() = default;

-void TtsService::BindTtsStream(
-    mojo::PendingReceiver<mojom::TtsStream> receiver,
+void TtsService::BindTtsStreamFactory(
+    mojo::PendingReceiver<mojom::TtsStreamFactory> receiver,
    mojo::PendingRemote<audio::mojom::StreamFactory> factory) {
-  stream_receiver_.Bind(std::move(receiver));
-  stream_receiver_.set_disconnect_handler(base::BindOnce([] {
-    // The remote which lives in component extension js has been disconnected
-    // due to destruction or error.
-    exit(0);
-  }));
+  tts_stream_factory_.Bind(std::move(receiver));

-  // TODO(accessibility): The sample rate below can change based on the audio
-  // data retrieved. Plumb this data through and re-create the output device if
-  // it changes.
-  media::AudioParameters params(
-      media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::CHANNEL_LAYOUT_MONO,
-      22050 /* sample rate */, libchrometts_.GoogleTtsGetFramesInAudioBuffer());
+  // TODO(accessibility): make it possible to change this dynamically.
+  media::AudioParameters params(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
+                                media::CHANNEL_LAYOUT_MONO, kDefaultSampleRate,
+                                kDefaultBufferSize);

  output_device_ = std::make_unique<audio::OutputDevice>(
      std::move(factory), params, this, std::string());
 }

-void TtsService::InstallVoice(const std::string& voice_name,
-                              const std::vector<uint8_t>& voice_bytes,
-                              InstallVoiceCallback callback) {
-  // Create a directory to place extracted voice data.
-  base::FilePath voice_data_path(kTempDataDirectory);
-  voice_data_path = voice_data_path.Append(voice_name);
-  if (base::DirectoryExists(voice_data_path)) {
-    std::move(callback).Run(true);
-    return;
-  }
-
-  if (!base::CreateDirectoryAndGetError(voice_data_path, nullptr)) {
-    std::move(callback).Run(false);
-    return;
-  }
-
-  std::move(callback).Run(libchrometts_.GoogleTtsInstallVoice(
-      voice_data_path.value().c_str(), (char*)&voice_bytes[0],
-      voice_bytes.size()));
+void TtsService::CreateGoogleTtsStream(CreateGoogleTtsStreamCallback callback) {
+  mojo::PendingRemote<mojom::GoogleTtsStream> remote;
+  auto receiver = remote.InitWithNewPipeAndPassReceiver();
+  google_tts_stream_ =
+      std::make_unique<GoogleTtsStream>(this, std::move(receiver));
+  std::move(callback).Run(std::move(remote));
 }

-void TtsService::SelectVoice(const std::string& voice_name,
-                             SelectVoiceCallback callback) {
-  base::FilePath path_prefix =
-      base::FilePath(kTempDataDirectory).Append(voice_name);
-  base::FilePath pipeline_path = path_prefix.Append("pipeline");
-  std::move(callback).Run(libchrometts_.GoogleTtsInit(
-      pipeline_path.value().c_str(), path_prefix.value().c_str()));
+void TtsService::CreatePlaybackTtsStream(
+    CreatePlaybackTtsStreamCallback callback) {
+  mojo::PendingRemote<mojom::PlaybackTtsStream> remote;
+  auto receiver = remote.InitWithNewPipeAndPassReceiver();
+  playback_tts_stream_ =
+      std::make_unique<PlaybackTtsStream>(this, std::move(receiver));
+  std::move(callback).Run(std::move(remote), kDefaultSampleRate,
+                          kDefaultBufferSize);
 }

-void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
-                       SpeakCallback callback) {
+void TtsService::Play(
+    base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)>
+        callback) {
  tts_event_observer_.reset();
  auto pending_receiver = tts_event_observer_.BindNewPipeAndPassReceiver();
  std::move(callback).Run(std::move(pending_receiver));

-  bool status = libchrometts_.GoogleTtsInitBuffered((char*)&text_jspb[0],
-                                                    text_jspb.size());
-  if (!status) {
-    tts_event_observer_->OnError();
-    return;
-  }
-
  output_device_->Play();
+}

-  is_playing_ = true;
-  base::ThreadTaskRunnerHandle::Get()->PostTask(
-      FROM_HERE,
-      base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
-                     true /* is_first_buffer */));
+void TtsService::AddAudioBuffer(AudioBuffer buf) {
+  base::AutoLock al(state_lock_);
+  buffers_.emplace_back(std::move(buf));
 }

 void TtsService::Stop() {
@@ -148,6 +96,13 @@ void TtsService::Resume() {
  output_device_->Play();
 }

+void TtsService::MaybeExit() {
+  if (google_tts_stream_ && !google_tts_stream_->IsBound() &&
+      playback_tts_stream_ && !playback_tts_stream_->IsBound()) {
+    exit(0);
+  }
+}
+
 int TtsService::Render(base::TimeDelta delay,
                       base::TimeTicks delay_timestamp,
                       int prior_frames_skipped,
@@ -164,7 +119,6 @@ int TtsService::Render(base::TimeDelta delay,
    const AudioBuffer& buf = buffers_.front();

    status = buf.status;
-
    // Done, 0, or error, -1.
    if (status <= 0) {
      if (status == -1)
@@ -201,48 +155,9 @@ int TtsService::Render(base::TimeDelta delay,
 void TtsService::OnRenderError() {}

 void TtsService::StopLocked(bool clear_buffers) {
-  if (!is_playing_)
-    return;
-
  output_device_->Pause();
-  if (clear_buffers) {
+  if (clear_buffers)
    buffers_.clear();
-    libchrometts_.GoogleTtsFinalizeBuffered();
-  }
-
-  is_playing_ = false;
-}
-
-void TtsService::ReadMoreFrames(bool is_first_buffer) {
-  if (!is_playing_)
-    return;
-
-  AudioBuffer buf;
-  buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
-  size_t frames_in_buf = 0;
-  buf.status =
-      libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
-
-  buf.frames.resize(frames_in_buf);
-
-  buf.char_index = -1;
-  if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
-    buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
-
-  buf.is_first_buffer = is_first_buffer;
-
-  {
-    base::AutoLock al(state_lock_);
-    buffers_.emplace_back(std::move(buf));
-  }
-
-  if (buf.status <= 0)
-    return;
-
-  base::ThreadTaskRunnerHandle::Get()->PostTask(
-      FROM_HERE,
-      base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
-                     false /* is_first_buffer */));
 }

 TtsService::AudioBuffer::AudioBuffer() = default;

--- a/chromeos/services/tts/tts_service.h
+++ b/chromeos/services/tts/tts_service.h
@@ -7,6 +7,8 @@

 #include "base/synchronization/lock.h"
 #include "base/thread_annotations.h"
+#include "chromeos/services/tts/google_tts_stream.h"
+#include "chromeos/services/tts/playback_tts_stream.h"
 #include "chromeos/services/tts/public/mojom/tts_service.mojom.h"
 #include "library_loaders/libchrometts.h"
 #include "media/base/audio_renderer_sink.h"
@@ -21,30 +23,48 @@ namespace chromeos {
 namespace tts {

 class TtsService : public mojom::TtsService,
-                   public mojom::TtsStream,
+                   public mojom::TtsStreamFactory,
                   public media::AudioRendererSink::RenderCallback {
 public:
+  // Helper group of state to pass from main thread to audio thread.
+  struct AudioBuffer {
+    AudioBuffer();
+    ~AudioBuffer();
+    AudioBuffer(const AudioBuffer& other) = delete;
+    AudioBuffer(AudioBuffer&& other);
+
+    std::vector<float> frames;
+    int char_index = -1;
+    int status = 0;
+    bool is_first_buffer = false;
+  };
+
  explicit TtsService(mojo::PendingReceiver<mojom::TtsService> receiver);
  ~TtsService() override;

+  // Audio operations.
+  void Play(
+      base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)>
+          callback);
+  void AddAudioBuffer(AudioBuffer buf);
+  void Stop();
+  void SetVolume(float volume);
+  void Pause();
+  void Resume();
+
+  // Maybe exit this process.
+  void MaybeExit();
+
 private:
  // mojom::TtsService:
-  void BindTtsStream(
-      mojo::PendingReceiver<mojom::TtsStream> receiver,
+  void BindTtsStreamFactory(
+      mojo::PendingReceiver<mojom::TtsStreamFactory> receiver,
      mojo::PendingRemote<audio::mojom::StreamFactory> factory) override;

-  // mojom::TtsStream:
-  void InstallVoice(const std::string& voice_name,
-                    const std::vector<uint8_t>& voice_bytes,
-                    InstallVoiceCallback callback) override;
-  void SelectVoice(const std::string& voice_name,
-                   SelectVoiceCallback callback) override;
-  void Speak(const std::vector<uint8_t>& text_jspb,
-             SpeakCallback callback) override;
-  void Stop() override;
-  void SetVolume(float volume) override;
-  void Pause() override;
-  void Resume() override;
+  // mojom::GoogleTtsStream:
+  void CreateGoogleTtsStream(CreateGoogleTtsStreamCallback callback) override;
+  void CreatePlaybackTtsStream(
+      CreatePlaybackTtsStreamCallback callback) override;

  // media::AudioRendererSink::RenderCallback:
  int Render(base::TimeDelta delay,
@@ -57,19 +77,18 @@ class TtsService : public mojom::TtsService,
  void StopLocked(bool clear_buffers = true)
      EXCLUSIVE_LOCKS_REQUIRED(state_lock_);

-  void ReadMoreFrames(bool is_first_buffer);
-
  // Connection to tts in the browser.
  mojo::Receiver<mojom::TtsService> service_receiver_;

-  // Protects access to state from main thread and audio thread.
-  base::Lock state_lock_;
+  // Factory creating various types of streams.
+  mojo::Receiver<mojom::TtsStreamFactory> tts_stream_factory_;

-  // Prebuilt.
-  LibChromeTtsLoader libchrometts_;
+  std::unique_ptr<GoogleTtsStream> google_tts_stream_;

-  // Connection to tts in the component extension.
-  mojo::Receiver<mojom::TtsStream> stream_receiver_;
+  std::unique_ptr<PlaybackTtsStream> playback_tts_stream_;
+
+  // Protects access to state from main thread and audio thread.
+  base::Lock state_lock_;

  // Connection to send tts events to component extension.
  mojo::Remote<mojom::TtsEventObserver> tts_event_observer_;
@@ -77,24 +96,8 @@ class TtsService : public mojom::TtsService,
  // Outputs speech synthesis to audio.
  std::unique_ptr<audio::OutputDevice> output_device_;

-  // Helper group of state to pass from main thread to audio thread.
-  struct AudioBuffer {
-    AudioBuffer();
-    ~AudioBuffer();
-    AudioBuffer(const AudioBuffer& other) = delete;
-    AudioBuffer(AudioBuffer&& other);
-
-    std::vector<float> frames;
-    int char_index;
-    int status;
-    bool is_first_buffer;
-  };
-
  // The queue of audio buffers to be played by the audio thread.
  std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
-
-  // Tracks whether the output device is playing audio.
-  bool is_playing_ = false;
 };

 }  // namespace tts