Commit cbd15465 authored by Josh Imbriani's avatar Josh Imbriani Committed by Chromium LUCI CQ

Enable multi voices in ChromeOS TTS

Bug: 169173203
Change-Id: I6afeb402e91399a529916f041b16f0964e8f4e3b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2572776Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarDavid Tseng <dtseng@chromium.org>
Commit-Queue: Josh Imbriani <joshimbriani@google.com>
Cr-Commit-Position: refs/heads/master@{#836289}
parent 53cc6f5b
...@@ -18,7 +18,9 @@ bool GoogleTtsInstallVoice(const char* voice_name, ...@@ -18,7 +18,9 @@ bool GoogleTtsInstallVoice(const char* voice_name,
const uint8_t* voice_bytes, const uint8_t* voice_bytes,
int size); int size);
bool GoogleTtsInitBuffered(const uint8_t* text_jspb, int text_jspb_len); bool GoogleTtsInitBuffered(const uint8_t* text_jspb,
const char* speaker_name,
int text_jspb_len);
int GoogleTtsReadBuffered(float* audio_channel_buffer, size_t* frames_written); int GoogleTtsReadBuffered(float* audio_channel_buffer, size_t* frames_written);
......
...@@ -90,15 +90,16 @@ void GoogleTtsStream::SelectVoice(const std::string& voice_name, ...@@ -90,15 +90,16 @@ void GoogleTtsStream::SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) { SelectVoiceCallback callback) {
base::FilePath path_prefix = base::FilePath path_prefix =
base::FilePath(kTempDataDirectory).Append(voice_name); base::FilePath(kTempDataDirectory).Append(voice_name);
base::FilePath pipeline_path = path_prefix.Append("pipeline"); base::FilePath pipeline_path = path_prefix.Append("pipeline.pb");
std::move(callback).Run(libchrometts_.GoogleTtsInit( std::move(callback).Run(libchrometts_.GoogleTtsInit(
pipeline_path.value().c_str(), path_prefix.value().c_str())); pipeline_path.value().c_str(), path_prefix.value().c_str()));
} }
void GoogleTtsStream::Speak(const std::vector<uint8_t>& text_jspb, void GoogleTtsStream::Speak(const std::vector<uint8_t>& text_jspb,
const std::string& speaker_name,
SpeakCallback callback) { SpeakCallback callback) {
bool status = bool status = libchrometts_.GoogleTtsInitBuffered(
libchrometts_.GoogleTtsInitBuffered(&text_jspb[0], text_jspb.size()); &text_jspb[0], speaker_name.c_str(), text_jspb.size());
if (!status) { if (!status) {
stream_receiver_.reset(); stream_receiver_.reset();
owner_->MaybeExit(); owner_->MaybeExit();
......
...@@ -31,6 +31,7 @@ class GoogleTtsStream : public mojom::GoogleTtsStream { ...@@ -31,6 +31,7 @@ class GoogleTtsStream : public mojom::GoogleTtsStream {
void SelectVoice(const std::string& voice_name, void SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) override; SelectVoiceCallback callback) override;
void Speak(const std::vector<uint8_t>& text_jspb, void Speak(const std::vector<uint8_t>& text_jspb,
const std::string& speaker_name,
SpeakCallback callback) override; SpeakCallback callback) override;
void Stop() override; void Stop() override;
void SetVolume(float volume) override; void SetVolume(float volume) override;
......
...@@ -17,7 +17,7 @@ import "services/audio/public/mojom/stream_factory.mojom"; ...@@ -17,7 +17,7 @@ import "services/audio/public/mojom/stream_factory.mojom";
// //
// <-component extension<----------------- // <-component extension<-----------------
// / \ // / \
// [Google|Playback]Stream TtsEventObserver // [Google|Playback]TtsStream TtsEventObserver
// | | // | |
// ----------- ----------------- // ----------- -----------------
// \ / // \ /
...@@ -81,8 +81,10 @@ interface GoogleTtsStream { ...@@ -81,8 +81,10 @@ interface GoogleTtsStream {
// Selects a voice for streaming given a |voice_name|. // Selects a voice for streaming given a |voice_name|.
SelectVoice(string voice_name) => (bool success); SelectVoice(string voice_name) => (bool success);
// Speak text described by a serialized proto.speech.tts.Text proto. // Speak text described by a serialized proto.speech.tts.Text proto with the
Speak(array<uint8> text_jspb) // speaker given by speaker_name. The call will fail if no speaker is given
// and the voice model is a multi-speaker model.
Speak(array<uint8> text_jspb, string speaker_name)
=> (pending_receiver<TtsEventObserver> event_observer); => (pending_receiver<TtsEventObserver> event_observer);
// Stop speaking the currently speaking text, if any. // Stop speaking the currently speaking text, if any.
......
...@@ -17,7 +17,7 @@ namespace chromeos { ...@@ -17,7 +17,7 @@ namespace chromeos {
namespace tts { namespace tts {
namespace { namespace {
constexpr int kDefaultSampleRate = 22050; constexpr int kDefaultSampleRate = 24000;
constexpr int kDefaultBufferSize = 512; constexpr int kDefaultBufferSize = 512;
} // namespace } // namespace
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment