Commit 696012e9 authored by David Tseng's avatar David Tseng Committed by Commit Bot

Espeak native tts audio playback support

See the Espeak-ng repo for Chrome here for required changes there:
https://chromium.googlesource.com/chromiumos/third_party/espeak-ng/

Depends on
https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2538938
(for try/catch while the cros sdk in telemetry is out of sync)
and
https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2538477
(for engine changes to adhere to the mojo changes here).

Bug: 1146417, 1146418, 1146420
Test: fixes issues above
Change-Id: I8057a881830e5b51671770fa654f742ecd3f47b0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2515786
Commit-Queue: David Tseng <dtseng@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarcalamity <calamity@chromium.org>
Cr-Commit-Position: refs/heads/master@{#829092}
parent 422bc672
......@@ -76,12 +76,12 @@ void BindHandwritingRecognizerRequestor(
}
#endif // BUILDFLAG(GOOGLE_CHROME_BRANDING)
void BindTtsStream(
void BindTtsStreamFactory(
content::RenderFrameHost* render_frame_host,
mojo::PendingReceiver<chromeos::tts::mojom::TtsStream> receiver) {
mojo::PendingReceiver<chromeos::tts::mojom::TtsStreamFactory> receiver) {
TtsEngineExtensionObserverChromeOS::GetInstance(
Profile::FromBrowserContext(render_frame_host->GetBrowserContext()))
->BindTtsStream(std::move(receiver));
->BindTtsStreamFactory(std::move(receiver));
}
void BindRemoteAppsFactory(
......@@ -168,9 +168,11 @@ void PopulateChromeFrameBindersForExtension(
base::BindRepeating(&chromeos::CameraAppUI::ConnectToCameraAppHelper));
}
if (extension->id() == extension_misc::kGoogleSpeechSynthesisExtensionId) {
binder_map->Add<chromeos::tts::mojom::TtsStream>(
base::BindRepeating(&BindTtsStream));
// TODO: extend to more extensions.
if (extension->id() == extension_misc::kGoogleSpeechSynthesisExtensionId ||
extension->id() == extension_misc::kEspeakSpeechSynthesisExtensionId) {
binder_map->Add<chromeos::tts::mojom::TtsStreamFactory>(
base::BindRepeating(&BindTtsStreamFactory));
}
if (chromeos::RemoteAppsImpl::IsAllowed(render_frame_host, extension)) {
......
......@@ -144,27 +144,33 @@ TtsEngineExtensionObserverChromeOS::TtsEngineExtensionObserverChromeOS(
TtsEngineExtensionObserverChromeOS::~TtsEngineExtensionObserverChromeOS() =
default;
void TtsEngineExtensionObserverChromeOS::BindTtsStream(
mojo::PendingReceiver<chromeos::tts::mojom::TtsStream> receiver) {
void TtsEngineExtensionObserverChromeOS::BindTtsStreamFactory(
mojo::PendingReceiver<chromeos::tts::mojom::TtsStreamFactory> receiver) {
// At this point, the component extension has loaded, and the js has requested
// a TtsStream be bound. It's safe now to update the keep alive count for
// important accessibility features. This path is also encountered if the
// a TtsStreamFactory be bound. It's safe now to update the keep alive count
// for important accessibility features. This path is also encountered if the
// component extension background page forceably window.close(s) on error.
UpdateGoogleSpeechSynthesisKeepAliveCountOnReload(profile_);
// Always launch a new TtsService. By assigning below, if |tts_service_| held
// a remote, it will be killed and a new one created, ensuring we only ever
// have one TtsService running.
tts_service_ =
content::ServiceProcessHost::Launch<chromeos::tts::mojom::TtsService>(
content::ServiceProcessHost::Options()
.WithDisplayName("TtsService")
.Pass());
// Only launch a new TtsService if necessary. By assigning below, if
// |tts_service_| held a remote, it will be killed and a new one created,
// ensuring we only ever have one TtsService running.
if (!tts_service_) {
tts_service_ =
content::ServiceProcessHost::Launch<chromeos::tts::mojom::TtsService>(
content::ServiceProcessHost::Options()
.WithDisplayName("TtsService")
.Pass());
}
// Always create a new audio stream for the tts stream. It is assumed once the
// tts stream is reset by the service, the audio stream is appropriately
// cleaned up by the audio service.
mojo::PendingRemote<audio::mojom::StreamFactory> factory_remote;
auto factory_receiver = factory_remote.InitWithNewPipeAndPassReceiver();
content::GetAudioService().BindStreamFactory(std::move(factory_receiver));
tts_service_->BindTtsStream(std::move(receiver), std::move(factory_remote));
tts_service_->BindTtsStreamFactory(std::move(receiver),
std::move(factory_remote));
}
void TtsEngineExtensionObserverChromeOS::Shutdown() {
......
......@@ -33,8 +33,8 @@ class TtsEngineExtensionObserverChromeOS
Profile* profile() { return profile_; }
void BindTtsStream(
mojo::PendingReceiver<chromeos::tts::mojom::TtsStream> receiver);
void BindTtsStreamFactory(
mojo::PendingReceiver<chromeos::tts::mojom::TtsStreamFactory> receiver);
// Implementation of KeyedService.
void Shutdown() override;
......
......@@ -175,9 +175,10 @@ void ChromeExtensionsDispatcherDelegate::PopulateSourceMap(
source_map->RegisterSource("chromeos.ime.service",
IDR_IME_SERVICE_BINDINGS_JS);
source_map->RegisterSource("chromeos.tts.mojom.tts_stream.mojom",
IDR_TTS_STREAM_MOJOM_JS);
source_map->RegisterSource("chromeos.tts.stream", IDR_TTS_STREAM_BINDINGS_JS);
source_map->RegisterSource("chromeos.tts.mojom.tts_stream_factory.mojom",
IDR_TTS_STREAM_FACTORY_MOJOM_JS);
source_map->RegisterSource("chromeos.tts.stream_factory",
IDR_TTS_STREAM_FACTORY_BINDINGS_JS);
// Imprivata API.
source_map->RegisterSource("chromeos.remote_apps.mojom-lite",
......
......@@ -9,11 +9,11 @@ if ((typeof mojo === 'undefined') || !mojo.bindingsLibraryInitialized) {
}
mojo.config.autoLoadMojomDeps = false;
loadScript('chromeos.tts.mojom.tts_stream.mojom');
loadScript('chromeos.tts.mojom.tts_stream_factory.mojom');
(function() {
let ptr = new chromeos.tts.mojom.TtsStreamPtr;
let ptr = new chromeos.tts.mojom.TtsStreamFactoryPtr;
Mojo.bindInterface(
chromeos.tts.mojom.TtsStream.name, mojo.makeRequest(ptr).handle);
chromeos.tts.mojom.TtsStreamFactory.name, mojo.makeRequest(ptr).handle);
exports.$set('returnValue', ptr);
})();
......@@ -70,8 +70,8 @@
<include name="IDR_REMOTE_APPS_MOJOM_LITE_JS" file="${mojom_root}\chromeos\components\remote_apps\mojom\remote_apps.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_MOJO_URL_MOJOM_LITE_JS" file="${mojom_root}\url\mojom\url.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_TTS_STREAM_BINDINGS_JS" file="extensions\chromeos_tts_stream_bindings.js" type="BINDATA" />
<include name="IDR_TTS_STREAM_MOJOM_JS" file="${mojom_root}\chromeos\services\tts\public\mojom\tts_service.mojom.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_TTS_STREAM_FACTORY_BINDINGS_JS" file="extensions\chromeos_tts_stream_factory_bindings.js" type="BINDATA" />
<include name="IDR_TTS_STREAM_FACTORY_MOJOM_JS" file="${mojom_root}\chromeos\services\tts\public\mojom\tts_service.mojom.js" use_base_dir="false" type="BINDATA" />
</if>
<!-- Media Router Mojo service and bindings. -->
<include name="IDR_MEDIA_CONTROLLER_MOJOM_JS" file="${mojom_root}\components\media_router\common\mojom\media_controller.mojom.js" use_base_dir="false" type="BINDATA" />
......
......@@ -8,6 +8,10 @@ source_set("tts") {
sources = [
"constants.cc",
"constants.h",
"google_tts_stream.cc",
"google_tts_stream.h",
"playback_tts_stream.cc",
"playback_tts_stream.h",
"tts_service.cc",
"tts_service.h",
]
......
......@@ -15,10 +15,10 @@ bool GoogleTtsInit(const char* pipeline_path, const char* path_prefix);
void GoogleTtsShutdown();
bool GoogleTtsInstallVoice(const char* voice_name,
const char* voice_bytes,
const uint8_t* voice_bytes,
int size);
bool GoogleTtsInitBuffered(const char* text_jspb, int text_jspb_len);
bool GoogleTtsInitBuffered(const uint8_t* text_jspb, int text_jspb_len);
int GoogleTtsReadBuffered(float* audio_channel_buffer, size_t* frames_written);
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/services/tts/google_tts_stream.h"
#include <dlfcn.h>
#include <sys/resource.h>
#include "base/files/file_util.h"
#include "chromeos/services/tts/constants.h"
#include "chromeos/services/tts/tts_service.h"
namespace chromeos {
namespace tts {
// Simple helper to bridge logging in the shared library to Chrome's logging.
void HandleLibraryLogging(int severity, const char* message) {
switch (severity) {
case logging::LOG_INFO:
// Suppressed.
break;
case logging::LOG_WARNING:
LOG(WARNING) << message;
break;
case logging::LOG_FATAL:
case logging::LOG_ERROR:
LOG(ERROR) << message;
break;
default:
break;
}
}
// GoogleTtsStream is mostly glue code that adapts the TtsStream interface into
// a form needed by libchrometts.so. As is convention with shared objects, the
// lifetime of all arguments passed to the library is scoped to the function.
//
// To keep the library interface stable and prevent name mangling, all library
// methods utilize C features only.
GoogleTtsStream::GoogleTtsStream(
TtsService* owner,
mojo::PendingReceiver<mojom::GoogleTtsStream> receiver)
: owner_(owner), stream_receiver_(this, std::move(receiver)) {
bool loaded = libchrometts_.Load(kLibchromettsPath);
if (!loaded) {
LOG(ERROR) << "Unable to load libchrometts.so.";
exit(0);
} else {
libchrometts_.GoogleTtsSetLogger(HandleLibraryLogging);
}
stream_receiver_.set_disconnect_handler(base::BindOnce(
[](TtsService* owner) {
// The remote which lives in component extension js has been
// disconnected due to destruction or error.
owner->MaybeExit();
},
owner));
}
GoogleTtsStream::~GoogleTtsStream() = default;
bool GoogleTtsStream::IsBound() const {
return stream_receiver_.is_bound();
}
void GoogleTtsStream::InstallVoice(const std::string& voice_name,
const std::vector<uint8_t>& voice_bytes,
InstallVoiceCallback callback) {
// Create a directory to place extracted voice data.
base::FilePath voice_data_path(kTempDataDirectory);
voice_data_path = voice_data_path.Append(voice_name);
if (base::DirectoryExists(voice_data_path)) {
std::move(callback).Run(true);
return;
}
if (!base::CreateDirectoryAndGetError(voice_data_path, nullptr)) {
std::move(callback).Run(false);
return;
}
std::move(callback).Run(libchrometts_.GoogleTtsInstallVoice(
voice_data_path.value().c_str(), &voice_bytes[0], voice_bytes.size()));
}
void GoogleTtsStream::SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) {
base::FilePath path_prefix =
base::FilePath(kTempDataDirectory).Append(voice_name);
base::FilePath pipeline_path = path_prefix.Append("pipeline");
std::move(callback).Run(libchrometts_.GoogleTtsInit(
pipeline_path.value().c_str(), path_prefix.value().c_str()));
}
void GoogleTtsStream::Speak(const std::vector<uint8_t>& text_jspb,
SpeakCallback callback) {
bool status =
libchrometts_.GoogleTtsInitBuffered(&text_jspb[0], text_jspb.size());
if (!status) {
stream_receiver_.reset();
owner_->MaybeExit();
return;
}
owner_->Play(std::move(callback));
is_buffering_ = true;
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&GoogleTtsStream::ReadMoreFrames,
weak_factory_.GetWeakPtr(), true /* is_first_buffer */));
}
void GoogleTtsStream::Stop() {
owner_->Stop();
is_buffering_ = false;
libchrometts_.GoogleTtsFinalizeBuffered();
}
void GoogleTtsStream::SetVolume(float volume) {
owner_->SetVolume(volume);
}
void GoogleTtsStream::Pause() {
owner_->Pause();
}
void GoogleTtsStream::Resume() {
owner_->Resume();
}
void GoogleTtsStream::ReadMoreFrames(bool is_first_buffer) {
if (!is_buffering_) {
return;
}
TtsService::AudioBuffer buf;
buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
size_t frames_in_buf = 0;
buf.status =
libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
buf.frames.resize(frames_in_buf);
buf.char_index = -1;
if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
buf.is_first_buffer = is_first_buffer;
owner_->AddAudioBuffer(std::move(buf));
if (buf.status <= 0)
return;
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&GoogleTtsStream::ReadMoreFrames,
weak_factory_.GetWeakPtr(), false /* is_first_buffer */));
}
} // namespace tts
} // namespace chromeos
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_SERVICES_TTS_GOOGLE_TTS_STREAM_H_
#define CHROMEOS_SERVICES_TTS_GOOGLE_TTS_STREAM_H_
#include "chromeos/services/tts/public/mojom/tts_service.mojom.h"
#include "library_loaders/libchrometts.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"
namespace chromeos {
namespace tts {
class TtsService;
class GoogleTtsStream : public mojom::GoogleTtsStream {
public:
GoogleTtsStream(TtsService* owner,
mojo::PendingReceiver<mojom::GoogleTtsStream> receiver);
~GoogleTtsStream() override;
bool IsBound() const;
private:
// mojom::GoogleTtsStream:
void InstallVoice(const std::string& voice_name,
const std::vector<uint8_t>& voice_bytes,
InstallVoiceCallback callback) override;
void SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) override;
void Speak(const std::vector<uint8_t>& text_jspb,
SpeakCallback callback) override;
void Stop() override;
void SetVolume(float volume) override;
void Pause() override;
void Resume() override;
void ReadMoreFrames(bool is_first_buffer);
// Owning service.
TtsService* owner_;
// Prebuilt.
LibChromeTtsLoader libchrometts_;
// Connection to tts in the component extension.
mojo::Receiver<mojom::GoogleTtsStream> stream_receiver_;
// Whether buffering is in progress.
bool is_buffering_ = false;
base::WeakPtrFactory<GoogleTtsStream> weak_factory_{this};
};
} // namespace tts
} // namespace chromeos
#endif // CHROMEOS_SERVICES_TTS_GOOGLE_TTS_STREAM_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/services/tts/playback_tts_stream.h"
#include "chromeos/services/tts/constants.h"
#include "chromeos/services/tts/tts_service.h"
namespace chromeos {
namespace tts {
PlaybackTtsStream::PlaybackTtsStream(
TtsService* owner,
mojo::PendingReceiver<mojom::PlaybackTtsStream> receiver)
: owner_(owner), stream_receiver_(this, std::move(receiver)) {
stream_receiver_.set_disconnect_handler(base::BindOnce(
[](TtsService* owner) {
// The remote which lives in component extension js has been
// disconnected due to destruction or error.
owner->MaybeExit();
},
owner));
}
PlaybackTtsStream::~PlaybackTtsStream() = default;
bool PlaybackTtsStream::IsBound() const {
return stream_receiver_.is_bound();
}
void PlaybackTtsStream::Play(PlayCallback callback) {
owner_->Play(std::move(callback));
// A small buffer to signal the start of the audio for this utterance.
TtsService::AudioBuffer buf;
buf.frames.resize(1, 0);
buf.status = 1;
buf.is_first_buffer = true;
owner_->AddAudioBuffer(std::move(buf));
}
void PlaybackTtsStream::SendAudioBuffer(
const std::vector<float>& samples_buffer,
int32_t char_index,
bool is_done) {
TtsService::AudioBuffer buf;
buf.frames = samples_buffer;
buf.status = is_done ? 0 : 1;
buf.char_index = char_index;
owner_->AddAudioBuffer(std::move(buf));
}
void PlaybackTtsStream::Stop() {
owner_->Stop();
}
void PlaybackTtsStream::SetVolume(float volume) {
owner_->SetVolume(volume);
}
void PlaybackTtsStream::Pause() {
owner_->Pause();
}
void PlaybackTtsStream::Resume() {
owner_->Resume();
}
} // namespace tts
} // namespace chromeos
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_SERVICES_TTS_PLAYBACK_TTS_STREAM_H_
#define CHROMEOS_SERVICES_TTS_PLAYBACK_TTS_STREAM_H_
#include "chromeos/services/tts/public/mojom/tts_service.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"
namespace chromeos {
namespace tts {
class TtsService;
class PlaybackTtsStream : public mojom::PlaybackTtsStream {
public:
PlaybackTtsStream(TtsService* owner,
mojo::PendingReceiver<mojom::PlaybackTtsStream> receiver);
~PlaybackTtsStream() override;
bool IsBound() const;
private:
// mojom::PlaybackTtsStream:
void Play(PlayCallback callback) override;
void SendAudioBuffer(const std::vector<float>& samples_buffer,
int32_t char_index,
bool is_done) override;
void Stop() override;
void SetVolume(float volume) override;
void Pause() override;
void Resume() override;
// Owning service.
TtsService* owner_;
// Connection to tts in the component extension.
mojo::Receiver<mojom::PlaybackTtsStream> stream_receiver_;
};
} // namespace tts
} // namespace chromeos
#endif // CHROMEOS_SERVICES_TTS_PLAYBACK_TTS_STREAM_H_
......@@ -6,17 +6,49 @@ module chromeos.tts.mojom;
import "services/audio/public/mojom/stream_factory.mojom";
// The main interface to the TTS engine on Chrome OS. Only used by and private
// to the Chrome OS Google TTS engine component extension. TtsService lives in a
// tts-sandboxed process. TtsEngineExtensionObserver, the other end of this
// interface, in the browser process, brokers a connection between TtsService
// and the Google TTS engine component extension through a TtsStream, but does
// not participate otherwise.
// The main interface to TTS engines on Chrome OS. Only used by and private to
// the Chrome OS Google and Espeak TTS engine component extensions. TtsService
// lives in a tts-sandboxed process. TtsEngineExtensionObserver, the other end
// of this interface, in the browser process, brokers a connection between
// TtsService and the TTS engine component extension through a TtsStreamFactory,
// but does not participate otherwise.
//
// Graph of audio data flow:
//
// <-component extension<-----------------
// / \
// [Google|Playback]Stream TtsEventObserver
// | |
// ----------- -----------------
// \ /
// ->TtsService->
// <->
// AudioOutputStream
// <->
// AudioService
interface TtsService {
// Binds a TtsStream to this service and returns an AudioOutputStream receiver
// which this service uses to play audio.
BindTtsStream(pending_receiver<TtsStream> receiver,
pending_remote<audio.mojom.StreamFactory> stream_factory);
// Binds a TtsStreamFactory implemented by this service. Returns an
// AudioOutputStream remote which is bound to AudioService in the browser and
// which pulls audio data for output.
BindTtsStreamFactory(pending_receiver<TtsStreamFactory> receiver,
pending_remote<audio.mojom.StreamFactory> stream_factory);
};
// Interface to create various types of tts streams.
//
// The remote is in a component extension; the receiver is the tts ervice
// utility process.
interface TtsStreamFactory {
// Obtains the tts stream used by the Google tts component extension.
CreateGoogleTtsStream() => (pending_remote<GoogleTtsStream> stream);
// Obtains the tts stream used by extensions that can perform audio generation
// and only require output services. Currently used only by the private Espeak
// tts engine extension.
CreatePlaybackTtsStream() => (
pending_remote<PlaybackTtsStream> stream,
int32 sample_rate,
int32 buffer_size);
};
// Interface for the Google component TTS engine to control
......@@ -41,7 +73,7 @@ interface TtsService {
//
// Note that the component extension may call Stop() early, if the TTS api
// wants to, for example, stop speech.
interface TtsStream {
interface GoogleTtsStream {
// Forward and install the |voice_name| encoded by |voice_bytes|.
InstallVoice(string voice_name, array<uint8> voice_bytes)
=> (bool success);
......@@ -66,8 +98,59 @@ interface TtsStream {
Resume();
};
// Returned to callers of TtsStream.speak(). It receives notable events
// pertaining to the text spoken.
// Interface for a tts engine to control the TtsService's production of audio
// for engines like Espeak, which send raw audio data.
//
// The remote is in a component extension; the receiver is the tts ervice
// utility process.
//
// Example usage:
// Play() - starts playback of an utterance like 'hello world'
// SendAudioBuffer(<first 1024 frames of audio>, 0, false)
// SendAudioBuffer(<1024 more frames of audio>, -1, false)
// ...
// SendAudioBuffer(<1024 more frames of audio>, 4, false)
// ...
// SendAudioBuffer(<1024 more frames of audio>, 11, true)
// Stop()
//
// Note: Espeak-ng is currently the only remote utilizing this interface. It is
// a private component extension to CHrome OS. Its sources can be found here:
// https://chromium.googlesource.com/chromiumos/third_party/espeak-ng
interface PlaybackTtsStream {
// Start playback of audio.
Play()
=> (pending_receiver<TtsEventObserver> event_observer);
// Send audio data to the tts service; expected to be called after Play,
// Resume and before Stop, Pause.
//
// |char_index| annotates the |frames_buffer| as the character index within
// the text being spoken. This is pass along in
// |TtsEventObserver.onTimepoint| at the moment when the buffer is being
// played.
//
// |last_buffer| indicates whether or not this is the last buffer of a
// particular tts utterance string.
SendAudioBuffer(
array<float> frames_buffer, int32 char_index, bool last_buffer);
// Stops on-going audio playback.
Stop();
// Sets volume of audio playback (0.0 to 1.0).
SetVolume(float volume);
// Pauses audio playback.
Pause();
// Resumes audio playback.
Resume();
};
// Returned to callers of GoogleTtsStream.speak() and
// PlaybackTtsStream.Play(). It receives notable events pertaining to the text
// spoken.
interface TtsEventObserver {
// TtsStream.Speak started speech playback.
OnStart();
......
......@@ -16,118 +16,66 @@
namespace chromeos {
namespace tts {
// Simple helper to bridge logging in the shared library to Chrome's logging.
void HandleLibraryLogging(int severity, const char* message) {
switch (severity) {
case logging::LOG_INFO:
// Suppressed.
break;
case logging::LOG_WARNING:
LOG(WARNING) << message;
break;
case logging::LOG_ERROR:
LOG(ERROR) << message;
break;
default:
break;
}
}
// TtsService is mostly glue code that adapts the TtsStream interface into a
// form needed by libchrometts.so. As is convention with shared objects, the
// lifetime of all arguments passed to the library is scoped to the function.
//
// To keep the library interface stable and prevent name mangling, all library
// methods utilize C features only.
namespace {
constexpr int kDefaultSampleRate = 22050;
constexpr int kDefaultBufferSize = 512;
} // namespace
TtsService::TtsService(mojo::PendingReceiver<mojom::TtsService> receiver)
: service_receiver_(this, std::move(receiver)), stream_receiver_(this) {
: service_receiver_(this, std::move(receiver)), tts_stream_factory_(this) {
if (setpriority(PRIO_PROCESS, 0, -10 /* real time audio */) != 0) {
PLOG(ERROR) << "Unable to request real time priority; performance will be "
"impacted.";
}
bool loaded = libchrometts_.Load(kLibchromettsPath);
if (!loaded) {
LOG(ERROR) << "Unable to load libchrometts.so.";
exit(0);
} else {
libchrometts_.GoogleTtsSetLogger(HandleLibraryLogging);
}
}
TtsService::~TtsService() = default;
void TtsService::BindTtsStream(
mojo::PendingReceiver<mojom::TtsStream> receiver,
void TtsService::BindTtsStreamFactory(
mojo::PendingReceiver<mojom::TtsStreamFactory> receiver,
mojo::PendingRemote<audio::mojom::StreamFactory> factory) {
stream_receiver_.Bind(std::move(receiver));
stream_receiver_.set_disconnect_handler(base::BindOnce([] {
// The remote which lives in component extension js has been disconnected
// due to destruction or error.
exit(0);
}));
tts_stream_factory_.Bind(std::move(receiver));
// TODO(accessibility): The sample rate below can change based on the audio
// data retrieved. Plumb this data through and re-create the output device if
// it changes.
media::AudioParameters params(
media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::CHANNEL_LAYOUT_MONO,
22050 /* sample rate */, libchrometts_.GoogleTtsGetFramesInAudioBuffer());
// TODO(accessibility): make it possible to change this dynamically.
media::AudioParameters params(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_MONO, kDefaultSampleRate,
kDefaultBufferSize);
output_device_ = std::make_unique<audio::OutputDevice>(
std::move(factory), params, this, std::string());
}
void TtsService::InstallVoice(const std::string& voice_name,
const std::vector<uint8_t>& voice_bytes,
InstallVoiceCallback callback) {
// Create a directory to place extracted voice data.
base::FilePath voice_data_path(kTempDataDirectory);
voice_data_path = voice_data_path.Append(voice_name);
if (base::DirectoryExists(voice_data_path)) {
std::move(callback).Run(true);
return;
}
if (!base::CreateDirectoryAndGetError(voice_data_path, nullptr)) {
std::move(callback).Run(false);
return;
}
std::move(callback).Run(libchrometts_.GoogleTtsInstallVoice(
voice_data_path.value().c_str(), (char*)&voice_bytes[0],
voice_bytes.size()));
void TtsService::CreateGoogleTtsStream(CreateGoogleTtsStreamCallback callback) {
mojo::PendingRemote<mojom::GoogleTtsStream> remote;
auto receiver = remote.InitWithNewPipeAndPassReceiver();
google_tts_stream_ =
std::make_unique<GoogleTtsStream>(this, std::move(receiver));
std::move(callback).Run(std::move(remote));
}
void TtsService::SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) {
base::FilePath path_prefix =
base::FilePath(kTempDataDirectory).Append(voice_name);
base::FilePath pipeline_path = path_prefix.Append("pipeline");
std::move(callback).Run(libchrometts_.GoogleTtsInit(
pipeline_path.value().c_str(), path_prefix.value().c_str()));
void TtsService::CreatePlaybackTtsStream(
CreatePlaybackTtsStreamCallback callback) {
mojo::PendingRemote<mojom::PlaybackTtsStream> remote;
auto receiver = remote.InitWithNewPipeAndPassReceiver();
playback_tts_stream_ =
std::make_unique<PlaybackTtsStream>(this, std::move(receiver));
std::move(callback).Run(std::move(remote), kDefaultSampleRate,
kDefaultBufferSize);
}
void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
SpeakCallback callback) {
void TtsService::Play(
base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)>
callback) {
tts_event_observer_.reset();
auto pending_receiver = tts_event_observer_.BindNewPipeAndPassReceiver();
std::move(callback).Run(std::move(pending_receiver));
bool status = libchrometts_.GoogleTtsInitBuffered((char*)&text_jspb[0],
text_jspb.size());
if (!status) {
tts_event_observer_->OnError();
return;
}
output_device_->Play();
}
is_playing_ = true;
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
true /* is_first_buffer */));
void TtsService::AddAudioBuffer(AudioBuffer buf) {
base::AutoLock al(state_lock_);
buffers_.emplace_back(std::move(buf));
}
void TtsService::Stop() {
......@@ -148,6 +96,13 @@ void TtsService::Resume() {
output_device_->Play();
}
void TtsService::MaybeExit() {
if (google_tts_stream_ && !google_tts_stream_->IsBound() &&
playback_tts_stream_ && !playback_tts_stream_->IsBound()) {
exit(0);
}
}
int TtsService::Render(base::TimeDelta delay,
base::TimeTicks delay_timestamp,
int prior_frames_skipped,
......@@ -164,7 +119,6 @@ int TtsService::Render(base::TimeDelta delay,
const AudioBuffer& buf = buffers_.front();
status = buf.status;
// Done, 0, or error, -1.
if (status <= 0) {
if (status == -1)
......@@ -201,48 +155,9 @@ int TtsService::Render(base::TimeDelta delay,
void TtsService::OnRenderError() {}
void TtsService::StopLocked(bool clear_buffers) {
if (!is_playing_)
return;
output_device_->Pause();
if (clear_buffers) {
if (clear_buffers)
buffers_.clear();
libchrometts_.GoogleTtsFinalizeBuffered();
}
is_playing_ = false;
}
void TtsService::ReadMoreFrames(bool is_first_buffer) {
if (!is_playing_)
return;
AudioBuffer buf;
buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
size_t frames_in_buf = 0;
buf.status =
libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
buf.frames.resize(frames_in_buf);
buf.char_index = -1;
if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
buf.is_first_buffer = is_first_buffer;
{
base::AutoLock al(state_lock_);
buffers_.emplace_back(std::move(buf));
}
if (buf.status <= 0)
return;
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
false /* is_first_buffer */));
}
TtsService::AudioBuffer::AudioBuffer() = default;
......
......@@ -7,6 +7,8 @@
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
#include "chromeos/services/tts/google_tts_stream.h"
#include "chromeos/services/tts/playback_tts_stream.h"
#include "chromeos/services/tts/public/mojom/tts_service.mojom.h"
#include "library_loaders/libchrometts.h"
#include "media/base/audio_renderer_sink.h"
......@@ -21,30 +23,48 @@ namespace chromeos {
namespace tts {
class TtsService : public mojom::TtsService,
public mojom::TtsStream,
public mojom::TtsStreamFactory,
public media::AudioRendererSink::RenderCallback {
public:
// Helper group of state to pass from main thread to audio thread.
struct AudioBuffer {
AudioBuffer();
~AudioBuffer();
AudioBuffer(const AudioBuffer& other) = delete;
AudioBuffer(AudioBuffer&& other);
std::vector<float> frames;
int char_index = -1;
int status = 0;
bool is_first_buffer = false;
};
explicit TtsService(mojo::PendingReceiver<mojom::TtsService> receiver);
~TtsService() override;
// Audio operations.
void Play(
base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)>
callback);
void AddAudioBuffer(AudioBuffer buf);
void Stop();
void SetVolume(float volume);
void Pause();
void Resume();
// Maybe exit this process.
void MaybeExit();
private:
// mojom::TtsService:
void BindTtsStream(
mojo::PendingReceiver<mojom::TtsStream> receiver,
void BindTtsStreamFactory(
mojo::PendingReceiver<mojom::TtsStreamFactory> receiver,
mojo::PendingRemote<audio::mojom::StreamFactory> factory) override;
// mojom::TtsStream:
void InstallVoice(const std::string& voice_name,
const std::vector<uint8_t>& voice_bytes,
InstallVoiceCallback callback) override;
void SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) override;
void Speak(const std::vector<uint8_t>& text_jspb,
SpeakCallback callback) override;
void Stop() override;
void SetVolume(float volume) override;
void Pause() override;
void Resume() override;
// mojom::GoogleTtsStream:
void CreateGoogleTtsStream(CreateGoogleTtsStreamCallback callback) override;
void CreatePlaybackTtsStream(
CreatePlaybackTtsStreamCallback callback) override;
// media::AudioRendererSink::RenderCallback:
int Render(base::TimeDelta delay,
......@@ -57,19 +77,18 @@ class TtsService : public mojom::TtsService,
void StopLocked(bool clear_buffers = true)
EXCLUSIVE_LOCKS_REQUIRED(state_lock_);
void ReadMoreFrames(bool is_first_buffer);
// Connection to tts in the browser.
mojo::Receiver<mojom::TtsService> service_receiver_;
// Protects access to state from main thread and audio thread.
base::Lock state_lock_;
// Factory creating various types of streams.
mojo::Receiver<mojom::TtsStreamFactory> tts_stream_factory_;
// Prebuilt.
LibChromeTtsLoader libchrometts_;
std::unique_ptr<GoogleTtsStream> google_tts_stream_;
// Connection to tts in the component extension.
mojo::Receiver<mojom::TtsStream> stream_receiver_;
std::unique_ptr<PlaybackTtsStream> playback_tts_stream_;
// Protects access to state from main thread and audio thread.
base::Lock state_lock_;
// Connection to send tts events to component extension.
mojo::Remote<mojom::TtsEventObserver> tts_event_observer_;
......@@ -77,24 +96,8 @@ class TtsService : public mojom::TtsService,
// Outputs speech synthesis to audio.
std::unique_ptr<audio::OutputDevice> output_device_;
// Helper group of state to pass from main thread to audio thread.
struct AudioBuffer {
AudioBuffer();
~AudioBuffer();
AudioBuffer(const AudioBuffer& other) = delete;
AudioBuffer(AudioBuffer&& other);
std::vector<float> frames;
int char_index;
int status;
bool is_first_buffer;
};
// The queue of audio buffers to be played by the audio thread.
std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
// Tracks whether the output device is playing audio.
bool is_playing_ = false;
};
} // namespace tts
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment