Commit cce93193 authored by Ryan Daum's avatar Ryan Daum Committed by Commit Bot

[chromecast] Begin unforking cast TTS controller

  * Change the TTS extension API to use the upstream TTS controller now
    that it's been moved out of chrome/ and into content/.
  * Requires downstream changes in internal/ to rework the platform,
    until those changes land, TTS will become a no-op.
  * Using upstream TtsController instead of our fork automagically
    makes it possible to enable and use the Speech Synthesis API

Bug: internal b/162974460
Merge-With: eureka-internal/434822
Test: manual, on device
Change-Id: I3acde313d177b74408394046a283b2da02dbff41
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2340703Reviewed-by: default avatarLuke Halliwell (slow) <halliwell@chromium.org>
Reviewed-by: default avatarSean Topping <seantopping@chromium.org>
Reviewed-by: default avatarDaniel Nicoara <dnicoara@chromium.org>
Reviewed-by: default avatarDominic Mazzoni <dmazzoni@chromium.org>
Reviewed-by: default avatarRandy Rossi <rmrossi@chromium.org>
Commit-Queue: Ryan Daum <rdaum@chromium.org>
Cr-Commit-Position: refs/heads/master@{#796433}
parent af87171f
...@@ -127,13 +127,6 @@ cast_source_set("browser") { ...@@ -127,13 +127,6 @@ cast_source_set("browser") {
"service/cast_service_simple.h", "service/cast_service_simple.h",
"service_connector.cc", "service_connector.cc",
"service_connector.h", "service_connector.h",
"tts/tts_controller.h",
"tts/tts_controller_impl.cc",
"tts/tts_controller_impl.h",
"tts/tts_platform.cc",
"tts/tts_platform.h",
"tts/tts_platform_stub.cc",
"tts/tts_platform_stub.h",
"webui/cast_resource_data_source.cc", "webui/cast_resource_data_source.cc",
"webui/cast_resource_data_source.h", "webui/cast_resource_data_source.h",
"webui/cast_webui.cc", "webui/cast_webui.cc",
......
...@@ -65,7 +65,7 @@ include_rules = [ ...@@ -65,7 +65,7 @@ include_rules = [
"+third_party/blink/public/mojom/loader/resource_load_info.mojom.h", "+third_party/blink/public/mojom/loader/resource_load_info.mojom.h",
"+third_party/blink/public/mojom/mediastream/media_stream.mojom-shared.h", "+third_party/blink/public/mojom/mediastream/media_stream.mojom-shared.h",
"+third_party/blink/public/mojom/messaging", "+third_party/blink/public/mojom/messaging",
"+third_party/blink/public/mojom/speech/speech_synthesis.mojom-forward.h", "+third_party/blink/public/mojom/speech/speech_synthesis.mojom.h",
"+third_party/skia/include/core/SkColor.h", "+third_party/skia/include/core/SkColor.h",
"+ui/accessibility", "+ui/accessibility",
"+ui/aura", "+ui/aura",
......
...@@ -44,8 +44,6 @@ ...@@ -44,8 +44,6 @@
#include "chromecast/browser/media/media_caps_impl.h" #include "chromecast/browser/media/media_caps_impl.h"
#include "chromecast/browser/metrics/cast_browser_metrics.h" #include "chromecast/browser/metrics/cast_browser_metrics.h"
#include "chromecast/browser/service_connector.h" #include "chromecast/browser/service_connector.h"
#include "chromecast/browser/tts/tts_controller_impl.h"
#include "chromecast/browser/tts/tts_platform_stub.h"
#include "chromecast/chromecast_buildflags.h" #include "chromecast/chromecast_buildflags.h"
#include "chromecast/graphics/cast_window_manager.h" #include "chromecast/graphics/cast_window_manager.h"
#include "chromecast/media/base/key_systems_common.h" #include "chromecast/media/base/key_systems_common.h"
...@@ -626,9 +624,6 @@ void CastBrowserMainParts::PreMainMessageLoopRun() { ...@@ -626,9 +624,6 @@ void CastBrowserMainParts::PreMainMessageLoopRun() {
::media::InitializeMediaLibrary(); ::media::InitializeMediaLibrary();
media_caps_->Initialize(); media_caps_->Initialize();
cast_browser_process_->SetTtsController(std::make_unique<TtsControllerImpl>(
std::make_unique<TtsPlatformImplStub>()));
#if BUILDFLAG(ENABLE_CHROMECAST_EXTENSIONS) #if BUILDFLAG(ENABLE_CHROMECAST_EXTENSIONS)
user_pref_service_ = extensions::cast_prefs::CreateUserPrefService( user_pref_service_ = extensions::cast_prefs::CreateUserPrefService(
cast_browser_process_->browser_context()); cast_browser_process_->browser_context());
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include "chromecast/browser/cast_network_contexts.h" #include "chromecast/browser/cast_network_contexts.h"
#include "chromecast/browser/devtools/remote_debugging_server.h" #include "chromecast/browser/devtools/remote_debugging_server.h"
#include "chromecast/browser/metrics/cast_browser_metrics.h" #include "chromecast/browser/metrics/cast_browser_metrics.h"
#include "chromecast/browser/tts/tts_controller.h"
#include "chromecast/metrics/cast_metrics_service_client.h" #include "chromecast/metrics/cast_metrics_service_client.h"
#include "chromecast/net/connectivity_checker.h" #include "chromecast/net/connectivity_checker.h"
#include "chromecast/service/cast_service.h" #include "chromecast/service/cast_service.h"
...@@ -145,12 +144,6 @@ void CastBrowserProcess::SetNetLog(net::NetLog* net_log) { ...@@ -145,12 +144,6 @@ void CastBrowserProcess::SetNetLog(net::NetLog* net_log) {
net_log_ = net_log; net_log_ = net_log;
} }
void CastBrowserProcess::SetTtsController(
std::unique_ptr<TtsController> tts_controller) {
DCHECK(!tts_controller_);
tts_controller_ = std::move(tts_controller);
}
void CastBrowserProcess::SetWebViewFactory( void CastBrowserProcess::SetWebViewFactory(
CastWebViewFactory* web_view_factory) { CastWebViewFactory* web_view_factory) {
DCHECK(!web_view_factory_); DCHECK(!web_view_factory_);
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#include "build/build_config.h" #include "build/build_config.h"
#include "chromecast/chromecast_buildflags.h" #include "chromecast/chromecast_buildflags.h"
class TtsController;
class PrefService; class PrefService;
namespace net { namespace net {
...@@ -76,7 +75,6 @@ class CastBrowserProcess { ...@@ -76,7 +75,6 @@ class CastBrowserProcess {
void SetConnectivityChecker( void SetConnectivityChecker(
scoped_refptr<ConnectivityChecker> connectivity_checker); scoped_refptr<ConnectivityChecker> connectivity_checker);
void SetNetLog(net::NetLog* net_log); void SetNetLog(net::NetLog* net_log);
void SetTtsController(std::unique_ptr<TtsController> tts_controller);
void SetWebViewFactory(CastWebViewFactory* web_view_factory); void SetWebViewFactory(CastWebViewFactory* web_view_factory);
CastContentBrowserClient* browser_client() const { CastContentBrowserClient* browser_client() const {
...@@ -108,7 +106,6 @@ class CastBrowserProcess { ...@@ -108,7 +106,6 @@ class CastBrowserProcess {
return remote_debugging_server_.get(); return remote_debugging_server_.get();
} }
net::NetLog* net_log() const { return net_log_; } net::NetLog* net_log() const { return net_log_; }
TtsController* tts_controller() const { return tts_controller_.get(); }
CastWebViewFactory* web_view_factory() const { return web_view_factory_; } CastWebViewFactory* web_view_factory() const { return web_view_factory_; }
private: private:
...@@ -132,7 +129,6 @@ class CastBrowserProcess { ...@@ -132,7 +129,6 @@ class CastBrowserProcess {
CastWebViewFactory* web_view_factory_; CastWebViewFactory* web_view_factory_;
CastContentBrowserClient* cast_content_browser_client_; CastContentBrowserClient* cast_content_browser_client_;
net::NetLog* net_log_; net::NetLog* net_log_;
std::unique_ptr<TtsController> tts_controller_;
// Note: CastService must be destroyed before others. // Note: CastService must be destroyed before others.
std::unique_ptr<CastService> cast_service_; std::unique_ptr<CastService> cast_service_;
......
...@@ -49,7 +49,6 @@ ...@@ -49,7 +49,6 @@
#include "chromecast/browser/media/media_caps_impl.h" #include "chromecast/browser/media/media_caps_impl.h"
#include "chromecast/browser/service/cast_service_simple.h" #include "chromecast/browser/service/cast_service_simple.h"
#include "chromecast/browser/service_connector.h" #include "chromecast/browser/service_connector.h"
#include "chromecast/browser/tts/tts_controller.h"
#include "chromecast/common/cast_content_client.h" #include "chromecast/common/cast_content_client.h"
#include "chromecast/common/global_descriptors.h" #include "chromecast/common/global_descriptors.h"
#include "chromecast/media/audio/cast_audio_manager.h" #include "chromecast/media/audio/cast_audio_manager.h"
......
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
#include "chromecast/browser/extensions/api/tts/tts_extension_api.h" #include "chromecast/browser/extensions/api/tts/tts_extension_api.h"
#include <stddef.h> #include <cstddef>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
...@@ -19,9 +18,10 @@ ...@@ -19,9 +18,10 @@
#include "base/values.h" #include "base/values.h"
#include "chromecast/browser/cast_browser_process.h" #include "chromecast/browser/cast_browser_process.h"
#include "chromecast/browser/extensions/api/tts/tts_extension_api_constants.h" #include "chromecast/browser/extensions/api/tts/tts_extension_api_constants.h"
#include "chromecast/browser/tts/tts_controller.h" #include "content/public/browser/tts_controller.h"
#include "extensions/browser/event_router.h" #include "extensions/browser/event_router.h"
#include "extensions/browser/extension_function_registry.h" #include "extensions/browser/extension_function_registry.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
#include "ui/base/l10n/l10n_util.h" #include "ui/base/l10n/l10n_util.h"
namespace constants = tts_extension_api_constants; namespace constants = tts_extension_api_constants;
...@@ -30,27 +30,27 @@ namespace events { ...@@ -30,27 +30,27 @@ namespace events {
const char kOnEvent[] = "tts.onEvent"; const char kOnEvent[] = "tts.onEvent";
} // namespace events } // namespace events
const char* TtsEventTypeToString(TtsEventType event_type) { const char* TtsEventTypeToString(content::TtsEventType event_type) {
switch (event_type) { switch (event_type) {
case TTS_EVENT_START: case content::TTS_EVENT_START:
return constants::kEventTypeStart; return constants::kEventTypeStart;
case TTS_EVENT_END: case content::TTS_EVENT_END:
return constants::kEventTypeEnd; return constants::kEventTypeEnd;
case TTS_EVENT_WORD: case content::TTS_EVENT_WORD:
return constants::kEventTypeWord; return constants::kEventTypeWord;
case TTS_EVENT_SENTENCE: case content::TTS_EVENT_SENTENCE:
return constants::kEventTypeSentence; return constants::kEventTypeSentence;
case TTS_EVENT_MARKER: case content::TTS_EVENT_MARKER:
return constants::kEventTypeMarker; return constants::kEventTypeMarker;
case TTS_EVENT_INTERRUPTED: case content::TTS_EVENT_INTERRUPTED:
return constants::kEventTypeInterrupted; return constants::kEventTypeInterrupted;
case TTS_EVENT_CANCELLED: case content::TTS_EVENT_CANCELLED:
return constants::kEventTypeCancelled; return constants::kEventTypeCancelled;
case TTS_EVENT_ERROR: case content::TTS_EVENT_ERROR:
return constants::kEventTypeError; return constants::kEventTypeError;
case TTS_EVENT_PAUSE: case content::TTS_EVENT_PAUSE:
return constants::kEventTypePause; return constants::kEventTypePause;
case TTS_EVENT_RESUME: case content::TTS_EVENT_RESUME:
return constants::kEventTypeResume; return constants::kEventTypeResume;
default: default:
NOTREACHED(); NOTREACHED();
...@@ -58,49 +58,44 @@ const char* TtsEventTypeToString(TtsEventType event_type) { ...@@ -58,49 +58,44 @@ const char* TtsEventTypeToString(TtsEventType event_type) {
} }
} }
TtsEventType TtsEventTypeFromString(const std::string& str) { content::TtsEventType TtsEventTypeFromString(const std::string& str) {
if (str == constants::kEventTypeStart) if (str == constants::kEventTypeStart)
return TTS_EVENT_START; return content::TTS_EVENT_START;
if (str == constants::kEventTypeEnd) if (str == constants::kEventTypeEnd)
return TTS_EVENT_END; return content::TTS_EVENT_END;
if (str == constants::kEventTypeWord) if (str == constants::kEventTypeWord)
return TTS_EVENT_WORD; return content::TTS_EVENT_WORD;
if (str == constants::kEventTypeSentence) if (str == constants::kEventTypeSentence)
return TTS_EVENT_SENTENCE; return content::TTS_EVENT_SENTENCE;
if (str == constants::kEventTypeMarker) if (str == constants::kEventTypeMarker)
return TTS_EVENT_MARKER; return content::TTS_EVENT_MARKER;
if (str == constants::kEventTypeInterrupted) if (str == constants::kEventTypeInterrupted)
return TTS_EVENT_INTERRUPTED; return content::TTS_EVENT_INTERRUPTED;
if (str == constants::kEventTypeCancelled) if (str == constants::kEventTypeCancelled)
return TTS_EVENT_CANCELLED; return content::TTS_EVENT_CANCELLED;
if (str == constants::kEventTypeError) if (str == constants::kEventTypeError)
return TTS_EVENT_ERROR; return content::TTS_EVENT_ERROR;
if (str == constants::kEventTypePause) if (str == constants::kEventTypePause)
return TTS_EVENT_PAUSE; return content::TTS_EVENT_PAUSE;
if (str == constants::kEventTypeResume) if (str == constants::kEventTypeResume)
return TTS_EVENT_RESUME; return content::TTS_EVENT_RESUME;
NOTREACHED(); NOTREACHED();
return TTS_EVENT_ERROR; return content::TTS_EVENT_ERROR;
}
namespace {
TtsController* GetTtsController() {
return chromecast::shell::CastBrowserProcess::GetInstance()->tts_controller();
} }
} // namespace
namespace extensions { namespace extensions {
// One of these is constructed for each utterance, and deleted // One of these is constructed for each utterance, and deleted
// when the utterance gets any final event. // when the utterance gets any final event.
class TtsExtensionEventHandler : public UtteranceEventDelegate { class TtsExtensionEventHandler : public content::UtteranceEventDelegate {
public: public:
explicit TtsExtensionEventHandler(const std::string& src_extension_id); explicit TtsExtensionEventHandler(const std::string& src_extension_id);
void OnTtsEvent(Utterance* utterance, void OnTtsEvent(content::TtsUtterance* utterance,
TtsEventType event_type, content::TtsEventType event_type,
int char_index, int char_index,
int length,
const std::string& error_message) override; const std::string& error_message) override;
private: private:
...@@ -113,21 +108,22 @@ TtsExtensionEventHandler::TtsExtensionEventHandler( ...@@ -113,21 +108,22 @@ TtsExtensionEventHandler::TtsExtensionEventHandler(
const std::string& src_extension_id) const std::string& src_extension_id)
: src_extension_id_(src_extension_id) {} : src_extension_id_(src_extension_id) {}
void TtsExtensionEventHandler::OnTtsEvent(Utterance* utterance, void TtsExtensionEventHandler::OnTtsEvent(content::TtsUtterance* utterance,
TtsEventType event_type, content::TtsEventType event_type,
int char_index, int char_index,
int length,
const std::string& error_message) { const std::string& error_message) {
if (utterance->src_id() < 0) { if (utterance->GetSrcId() < 0) {
if (utterance->finished()) if (utterance->IsFinished())
delete this; delete this;
return; return;
} }
const std::set<TtsEventType>& desired_event_types = const std::set<content::TtsEventType>& desired_event_types =
utterance->desired_event_types(); utterance->GetDesiredEventTypes();
if (desired_event_types.size() > 0 && if (!desired_event_types.empty() &&
desired_event_types.find(event_type) == desired_event_types.end()) { desired_event_types.find(event_type) == desired_event_types.end()) {
if (utterance->finished()) if (utterance->IsFinished())
delete this; delete this;
return; return;
} }
...@@ -136,24 +132,26 @@ void TtsExtensionEventHandler::OnTtsEvent(Utterance* utterance, ...@@ -136,24 +132,26 @@ void TtsExtensionEventHandler::OnTtsEvent(Utterance* utterance,
std::unique_ptr<base::DictionaryValue> details(new base::DictionaryValue()); std::unique_ptr<base::DictionaryValue> details(new base::DictionaryValue());
if (char_index >= 0) if (char_index >= 0)
details->SetInteger(constants::kCharIndexKey, char_index); details->SetInteger(constants::kCharIndexKey, char_index);
if (length >= 0)
details->SetInteger(constants::kLengthKey, length);
details->SetString(constants::kEventTypeKey, event_type_string); details->SetString(constants::kEventTypeKey, event_type_string);
if (event_type == TTS_EVENT_ERROR) { if (event_type == content::TTS_EVENT_ERROR) {
details->SetString(constants::kErrorMessageKey, error_message); details->SetString(constants::kErrorMessageKey, error_message);
} }
details->SetInteger(constants::kSrcIdKey, utterance->src_id()); details->SetInteger(constants::kSrcIdKey, utterance->GetSrcId());
details->SetBoolean(constants::kIsFinalEventKey, utterance->finished()); details->SetBoolean(constants::kIsFinalEventKey, utterance->IsFinished());
std::unique_ptr<base::ListValue> arguments(new base::ListValue()); std::unique_ptr<base::ListValue> arguments(new base::ListValue());
arguments->Append(std::move(details)); arguments->Append(std::move(details));
auto event = std::make_unique<extensions::Event>( auto event = std::make_unique<extensions::Event>(
::extensions::events::TTS_ON_EVENT, ::events::kOnEvent, ::extensions::events::TTS_ON_EVENT, ::events::kOnEvent,
std::move(arguments), utterance->browser_context()); std::move(arguments), utterance->GetBrowserContext());
event->event_url = utterance->src_url(); event->event_url = utterance->GetSrcUrl();
extensions::EventRouter::Get(utterance->browser_context()) extensions::EventRouter::Get(utterance->GetBrowserContext())
->DispatchEventToExtension(src_extension_id_, std::move(event)); ->DispatchEventToExtension(src_extension_id_, std::move(event));
if (utterance->finished()) if (utterance->IsFinished())
delete this; delete this;
} }
...@@ -184,22 +182,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() { ...@@ -184,22 +182,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() {
return RespondNow(Error(constants::kErrorInvalidLang)); return RespondNow(Error(constants::kErrorInvalidLang));
} }
std::string gender_str; double rate = blink::mojom::kSpeechSynthesisDoublePrefNotSet;
TtsGenderType gender;
if (options->HasKey(constants::kGenderKey))
EXTENSION_FUNCTION_VALIDATE(
options->GetString(constants::kGenderKey, &gender_str));
if (gender_str == constants::kGenderMale) {
gender = TTS_GENDER_MALE;
} else if (gender_str == constants::kGenderFemale) {
gender = TTS_GENDER_FEMALE;
} else if (gender_str.empty()) {
gender = TTS_GENDER_NONE;
} else {
return RespondNow(Error(constants::kErrorInvalidGender));
}
double rate = 1.0;
if (options->HasKey(constants::kRateKey)) { if (options->HasKey(constants::kRateKey)) {
EXTENSION_FUNCTION_VALIDATE(options->GetDouble(constants::kRateKey, &rate)); EXTENSION_FUNCTION_VALIDATE(options->GetDouble(constants::kRateKey, &rate));
if (rate < 0.1 || rate > 10.0) { if (rate < 0.1 || rate > 10.0) {
...@@ -207,7 +190,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() { ...@@ -207,7 +190,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() {
} }
} }
double pitch = 1.0; double pitch = blink::mojom::kSpeechSynthesisDoublePrefNotSet;
if (options->HasKey(constants::kPitchKey)) { if (options->HasKey(constants::kPitchKey)) {
EXTENSION_FUNCTION_VALIDATE( EXTENSION_FUNCTION_VALIDATE(
options->GetDouble(constants::kPitchKey, &pitch)); options->GetDouble(constants::kPitchKey, &pitch));
...@@ -216,7 +199,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() { ...@@ -216,7 +199,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() {
} }
} }
double volume = 1.0; double volume = blink::mojom::kSpeechSynthesisDoublePrefNotSet;
if (options->HasKey(constants::kVolumeKey)) { if (options->HasKey(constants::kVolumeKey)) {
EXTENSION_FUNCTION_VALIDATE( EXTENSION_FUNCTION_VALIDATE(
options->GetDouble(constants::kVolumeKey, &volume)); options->GetDouble(constants::kVolumeKey, &volume));
...@@ -231,7 +214,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() { ...@@ -231,7 +214,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() {
options->GetBoolean(constants::kEnqueueKey, &can_enqueue)); options->GetBoolean(constants::kEnqueueKey, &can_enqueue));
} }
std::set<TtsEventType> required_event_types; std::set<content::TtsEventType> required_event_types;
if (options->HasKey(constants::kRequiredEventTypesKey)) { if (options->HasKey(constants::kRequiredEventTypesKey)) {
base::ListValue* list; base::ListValue* list;
EXTENSION_FUNCTION_VALIDATE( EXTENSION_FUNCTION_VALIDATE(
...@@ -243,7 +226,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() { ...@@ -243,7 +226,7 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() {
} }
} }
std::set<TtsEventType> desired_event_types; std::set<content::TtsEventType> desired_event_types;
if (options->HasKey(constants::kDesiredEventTypesKey)) { if (options->HasKey(constants::kDesiredEventTypesKey)) {
base::ListValue* list; base::ListValue* list;
EXTENSION_FUNCTION_VALIDATE( EXTENSION_FUNCTION_VALIDATE(
...@@ -271,71 +254,66 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() { ...@@ -271,71 +254,66 @@ ExtensionFunction::ResponseAction TtsSpeakFunction::Run() {
// send the success response to the callback now - this ensures that // send the success response to the callback now - this ensures that
// the callback response always arrives before events, which makes // the callback response always arrives before events, which makes
// the behavior more predictable and easier to write unit tests for too. // the behavior more predictable and easier to write unit tests for too.
Respond(NoArguments());
Respond(OneArgument(std::make_unique<base::Value>(true)));
std::unique_ptr<content::TtsUtterance> utterance =
Utterance* utterance = new Utterance(browser_context()); content::TtsUtterance::Create(browser_context());
utterance->set_text(text); utterance->SetText(text);
utterance->set_voice_name(voice_name); utterance->SetVoiceName(voice_name);
utterance->set_src_id(src_id); utterance->SetSrcId(src_id);
utterance->set_src_url(source_url()); utterance->SetSrcUrl(source_url());
utterance->set_lang(lang); utterance->SetLang(lang);
utterance->set_gender(gender); utterance->SetContinuousParameters(rate, pitch, volume);
utterance->set_continuous_parameters(rate, pitch, volume); utterance->SetCanEnqueue(can_enqueue);
utterance->set_can_enqueue(can_enqueue); utterance->SetRequiredEventTypes(required_event_types);
utterance->set_required_event_types(required_event_types); utterance->SetDesiredEventTypes(desired_event_types);
utterance->set_desired_event_types(desired_event_types); utterance->SetEngineId(voice_extension_id);
utterance->set_extension_id(voice_extension_id); utterance->SetOptions(options.get());
utterance->set_options(options.get()); utterance->SetEventDelegate(new TtsExtensionEventHandler(extension_id()));
utterance->set_event_delegate(new TtsExtensionEventHandler(extension_id()));
content::TtsController* controller = content::TtsController::GetInstance();
GetTtsController()->SpeakOrEnqueue(utterance); controller->SpeakOrEnqueue(std::move(utterance));
return did_respond() ? AlreadyResponded() : RespondLater(); return AlreadyResponded();
} }
ExtensionFunction::ResponseAction TtsStopSpeakingFunction::Run() { ExtensionFunction::ResponseAction TtsStopSpeakingFunction::Run() {
GetTtsController()->Stop(); content::TtsController::GetInstance()->Stop(source_url());
return RespondNow(NoArguments()); return RespondNow(NoArguments());
} }
ExtensionFunction::ResponseAction TtsPauseFunction::Run() { ExtensionFunction::ResponseAction TtsPauseFunction::Run() {
GetTtsController()->Pause(); content::TtsController::GetInstance()->Pause();
return RespondNow(NoArguments()); return RespondNow(NoArguments());
} }
ExtensionFunction::ResponseAction TtsResumeFunction::Run() { ExtensionFunction::ResponseAction TtsResumeFunction::Run() {
GetTtsController()->Resume(); content::TtsController::GetInstance()->Resume();
return RespondNow(NoArguments()); return RespondNow(NoArguments());
} }
ExtensionFunction::ResponseAction TtsIsSpeakingFunction::Run() { ExtensionFunction::ResponseAction TtsIsSpeakingFunction::Run() {
return RespondNow(OneArgument( return RespondNow(OneArgument(std::make_unique<base::Value>(
std::make_unique<base::Value>(GetTtsController()->IsSpeaking()))); content::TtsController::GetInstance()->IsSpeaking())));
} }
ExtensionFunction::ResponseAction TtsGetVoicesFunction::Run() { ExtensionFunction::ResponseAction TtsGetVoicesFunction::Run() {
std::vector<VoiceData> voices; std::vector<content::VoiceData> voices;
GetTtsController()->GetVoices(browser_context(), &voices); content::TtsController::GetInstance()->GetVoices(browser_context(), &voices);
auto result_voices = std::make_unique<base::ListValue>(); auto result_voices = std::make_unique<base::ListValue>();
for (size_t i = 0; i < voices.size(); ++i) { for (size_t i = 0; i < voices.size(); ++i) {
const VoiceData& voice = voices[i]; const content::VoiceData& voice = voices[i];
std::unique_ptr<base::DictionaryValue> result_voice( std::unique_ptr<base::DictionaryValue> result_voice(
new base::DictionaryValue()); new base::DictionaryValue());
result_voice->SetString(constants::kVoiceNameKey, voice.name); result_voice->SetString(constants::kVoiceNameKey, voice.name);
result_voice->SetBoolean(constants::kRemoteKey, voice.remote); result_voice->SetBoolean(constants::kRemoteKey, voice.remote);
if (!voice.lang.empty()) if (!voice.lang.empty())
result_voice->SetString(constants::kLangKey, voice.lang); result_voice->SetString(constants::kLangKey, voice.lang);
if (voice.gender == TTS_GENDER_MALE) if (!voice.engine_id.empty())
result_voice->SetString(constants::kGenderKey, constants::kGenderMale); result_voice->SetString(constants::kExtensionIdKey, voice.engine_id);
else if (voice.gender == TTS_GENDER_FEMALE)
result_voice->SetString(constants::kGenderKey, constants::kGenderFemale);
if (!voice.extension_id.empty())
result_voice->SetString(constants::kExtensionIdKey, voice.extension_id);
auto event_types = std::make_unique<base::ListValue>(); auto event_types = std::make_unique<base::ListValue>();
for (std::set<TtsEventType>::iterator iter = voice.events.begin(); for (auto iter = voice.events.begin(); iter != voice.events.end(); ++iter) {
iter != voice.events.end(); ++iter) {
const char* event_name_constant = TtsEventTypeToString(*iter); const char* event_name_constant = TtsEventTypeToString(*iter);
event_types->AppendString(event_name_constant); event_types->AppendString(event_name_constant);
} }
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include <string> #include <string>
#include "chromecast/browser/tts/tts_controller.h" #include "content/public/browser/tts_controller.h"
#include "extensions/browser/browser_context_keyed_api_factory.h" #include "extensions/browser/browser_context_keyed_api_factory.h"
#include "extensions/browser/extension_function.h" #include "extensions/browser/extension_function.h"
...@@ -20,8 +20,8 @@ namespace content { ...@@ -20,8 +20,8 @@ namespace content {
class BrowserContext; class BrowserContext;
} }
const char* TtsEventTypeToString(TtsEventType event_type); const char* TtsEventTypeToString(content::TtsEventType event_type);
TtsEventType TtsEventTypeFromString(const std::string& str); content::TtsEventType TtsEventTypeFromString(const std::string& str);
namespace extensions { namespace extensions {
......
...@@ -2,16 +2,12 @@ ...@@ -2,16 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
// PLEASE NOTE: this is a copy with modifications from
// /chrome/browser/speech/extension_api
// It is temporary until a refactoring to move the chrome TTS implementation up
// into components and extensions/components can be completed.
#include "chromecast/browser/extensions/api/tts/tts_extension_api_constants.h" #include "chromecast/browser/extensions/api/tts/tts_extension_api_constants.h"
namespace tts_extension_api_constants { namespace tts_extension_api_constants {
const char kCharIndexKey[] = "charIndex"; const char kCharIndexKey[] = "charIndex";
const char kLengthKey[] = "length";
const char kDesiredEventTypesKey[] = "desiredEventTypes"; const char kDesiredEventTypesKey[] = "desiredEventTypes";
const char kEnqueueKey[] = "enqueue"; const char kEnqueueKey[] = "enqueue";
const char kErrorMessageKey[] = "errorMessage"; const char kErrorMessageKey[] = "errorMessage";
...@@ -30,9 +26,6 @@ const char kSrcIdKey[] = "srcId"; ...@@ -30,9 +26,6 @@ const char kSrcIdKey[] = "srcId";
const char kVoiceNameKey[] = "voiceName"; const char kVoiceNameKey[] = "voiceName";
const char kVolumeKey[] = "volume"; const char kVolumeKey[] = "volume";
const char kGenderFemale[] = "female";
const char kGenderMale[] = "male";
const char kEventTypeCancelled[] = "cancelled"; const char kEventTypeCancelled[] = "cancelled";
const char kEventTypeEnd[] = "end"; const char kEventTypeEnd[] = "end";
const char kEventTypeError[] = "error"; const char kEventTypeError[] = "error";
...@@ -45,7 +38,6 @@ const char kEventTypeStart[] = "start"; ...@@ -45,7 +38,6 @@ const char kEventTypeStart[] = "start";
const char kEventTypeWord[] = "word"; const char kEventTypeWord[] = "word";
const char kErrorExtensionIdMismatch[] = "Extension id mismatch."; const char kErrorExtensionIdMismatch[] = "Extension id mismatch.";
const char kErrorInvalidGender[] = "Invalid gender.";
const char kErrorInvalidLang[] = "Invalid lang."; const char kErrorInvalidLang[] = "Invalid lang.";
const char kErrorInvalidPitch[] = "Invalid pitch."; const char kErrorInvalidPitch[] = "Invalid pitch.";
const char kErrorInvalidRate[] = "Invalid rate."; const char kErrorInvalidRate[] = "Invalid rate.";
......
...@@ -2,11 +2,6 @@ ...@@ -2,11 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
// PLEASE NOTE: this is a copy with modifications from
// /chrome/browser/speech/extension_api
// It is temporary until a refactoring to move the chrome TTS implementation up
// into components and extensions/components can be completed.
#ifndef CHROMECAST_BROWSER_EXTENSIONS_API_TTS_TTS_EXTENSION_API_CONSTANTS_H_ #ifndef CHROMECAST_BROWSER_EXTENSIONS_API_TTS_TTS_EXTENSION_API_CONSTANTS_H_
#define CHROMECAST_BROWSER_EXTENSIONS_API_TTS_TTS_EXTENSION_API_CONSTANTS_H_ #define CHROMECAST_BROWSER_EXTENSIONS_API_TTS_TTS_EXTENSION_API_CONSTANTS_H_
...@@ -17,6 +12,7 @@ ...@@ -17,6 +12,7 @@
namespace tts_extension_api_constants { namespace tts_extension_api_constants {
extern const char kCharIndexKey[]; extern const char kCharIndexKey[];
extern const char kLengthKey[];
extern const char kDesiredEventTypesKey[]; extern const char kDesiredEventTypesKey[];
extern const char kEnqueueKey[]; extern const char kEnqueueKey[];
extern const char kErrorMessageKey[]; extern const char kErrorMessageKey[];
...@@ -35,9 +31,6 @@ extern const char kSrcIdKey[]; ...@@ -35,9 +31,6 @@ extern const char kSrcIdKey[];
extern const char kVoiceNameKey[]; extern const char kVoiceNameKey[];
extern const char kVolumeKey[]; extern const char kVolumeKey[];
extern const char kGenderFemale[];
extern const char kGenderMale[];
extern const char kEventTypeCancelled[]; extern const char kEventTypeCancelled[];
extern const char kEventTypeEnd[]; extern const char kEventTypeEnd[];
extern const char kEventTypeError[]; extern const char kEventTypeError[];
...@@ -50,7 +43,6 @@ extern const char kEventTypeStart[]; ...@@ -50,7 +43,6 @@ extern const char kEventTypeStart[];
extern const char kEventTypeWord[]; extern const char kEventTypeWord[];
extern const char kErrorExtensionIdMismatch[]; extern const char kErrorExtensionIdMismatch[];
extern const char kErrorInvalidGender[];
extern const char kErrorInvalidLang[]; extern const char kErrorInvalidLang[];
extern const char kErrorInvalidPitch[]; extern const char kErrorInvalidPitch[];
extern const char kErrorInvalidRate[]; extern const char kErrorInvalidRate[];
......
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMECAST_BROWSER_TTS_TTS_CONTROLLER_H_
#define CHROMECAST_BROWSER_TTS_TTS_CONTROLLER_H_
#include <memory>
#include <queue>
#include <set>
#include <string>
#include <vector>
#include "base/memory/singleton.h"
#include "base/memory/weak_ptr.h"
#include "url/gurl.h"
class Utterance;
class TtsPlatformImpl;
namespace base {
class Value;
} // namespace base
namespace content {
class BrowserContext;
} // namespace content
// Events sent back from the TTS engine indicating the progress.
enum TtsEventType {
TTS_EVENT_START,
TTS_EVENT_END,
TTS_EVENT_WORD,
TTS_EVENT_SENTENCE,
TTS_EVENT_MARKER,
TTS_EVENT_INTERRUPTED,
TTS_EVENT_CANCELLED,
TTS_EVENT_ERROR,
TTS_EVENT_PAUSE,
TTS_EVENT_RESUME
};
enum TtsGenderType { TTS_GENDER_NONE, TTS_GENDER_MALE, TTS_GENDER_FEMALE };
// Returns true if this event type is one that indicates an utterance
// is finished and can be destroyed.
bool IsFinalTtsEventType(TtsEventType event_type);
// The continuous parameters that apply to a given utterance.
struct UtteranceContinuousParameters {
UtteranceContinuousParameters();
double rate;
double pitch;
double volume;
};
// Information about one voice.
struct VoiceData {
VoiceData();
VoiceData(const VoiceData& other);
~VoiceData();
std::string name;
std::string lang;
TtsGenderType gender;
std::string extension_id; // Not used in cast.
std::set<TtsEventType> events;
// If true, the synthesis engine is a remote network resource.
// It may be higher latency and may incur bandwidth costs.
bool remote;
// If true, this is implemented by this platform's subclass of
// TtsPlatformImpl. If false, this is implemented by an extension.
bool native;
std::string native_voice_identifier;
};
// Class that wants to receive events on utterances.
class UtteranceEventDelegate {
public:
virtual ~UtteranceEventDelegate() {}
virtual void OnTtsEvent(Utterance* utterance,
TtsEventType event_type,
int char_index,
const std::string& error_message) = 0;
};
// One speech utterance.
class Utterance {
public:
// Construct an utterance given a profile and a completion task to call
// when the utterance is done speaking. Before speaking this utterance,
// its other parameters like text, rate, pitch, etc. should all be set.
explicit Utterance(content::BrowserContext* browser_context);
~Utterance();
// Sends an event to the delegate. If the event type is TTS_EVENT_END
// or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
// uses the last good value.
void OnTtsEvent(TtsEventType event_type,
int char_index,
const std::string& error_message);
// Finish an utterance without sending an event to the delegate.
void Finish();
// Getters and setters for the text to speak and other speech options.
void set_text(const std::string& text) { text_ = text; }
const std::string& text() const { return text_; }
void set_options(const base::Value* options);
const base::Value* options() const { return options_.get(); }
void set_src_id(int src_id) { src_id_ = src_id; }
int src_id() { return src_id_; }
void set_src_url(const GURL& src_url) { src_url_ = src_url; }
const GURL& src_url() { return src_url_; }
void set_voice_name(const std::string& voice_name) {
voice_name_ = voice_name;
}
const std::string& voice_name() const { return voice_name_; }
void set_lang(const std::string& lang) { lang_ = lang; }
const std::string& lang() const { return lang_; }
void set_gender(TtsGenderType gender) { gender_ = gender; }
TtsGenderType gender() const { return gender_; }
void set_continuous_parameters(const double rate,
const double pitch,
const double volume) {
continuous_parameters_.rate = rate;
continuous_parameters_.pitch = pitch;
continuous_parameters_.volume = volume;
}
const UtteranceContinuousParameters& continuous_parameters() {
return continuous_parameters_;
}
void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
bool can_enqueue() const { return can_enqueue_; }
void set_required_event_types(const std::set<TtsEventType>& types) {
required_event_types_ = types;
}
const std::set<TtsEventType>& required_event_types() const {
return required_event_types_;
}
void set_desired_event_types(const std::set<TtsEventType>& types) {
desired_event_types_ = types;
}
const std::set<TtsEventType>& desired_event_types() const {
return desired_event_types_;
}
const std::string& extension_id() const { return extension_id_; }
void set_extension_id(const std::string& extension_id) {
extension_id_ = extension_id;
}
UtteranceEventDelegate* event_delegate() const { return event_delegate_; }
void set_event_delegate(UtteranceEventDelegate* event_delegate) {
event_delegate_ = event_delegate;
}
// Getters and setters for internal state.
content::BrowserContext* browser_context() const { return browser_context_; }
int id() const { return id_; }
bool finished() const { return finished_; }
private:
// The BrowserContext that initiated this utterance.
content::BrowserContext* browser_context_;
// The extension ID of the extension providing TTS for this utterance, or
// empty if native TTS is being used.
std::string extension_id_;
// The unique ID of this utterance, used to associate callback functions
// with utterances.
int id_;
// The id of the next utterance, so we can associate requests with
// responses.
static int next_utterance_id_;
// The text to speak.
std::string text_;
// The full options arg passed to tts.speak, which may include fields
// other than the ones we explicitly parse, below.
std::unique_ptr<base::Value> options_;
// The source extension's ID of this utterance, so that it can associate
// events with the appropriate callback.
int src_id_;
// The URL of the page where the source extension called speak.
GURL src_url_;
// The delegate to be called when an utterance event is fired.
UtteranceEventDelegate* event_delegate_;
// The parsed options.
std::string voice_name_;
std::string lang_;
TtsGenderType gender_;
UtteranceContinuousParameters continuous_parameters_;
bool can_enqueue_;
std::set<TtsEventType> required_event_types_;
std::set<TtsEventType> desired_event_types_;
// The index of the current char being spoken.
int char_index_;
// True if this utterance received an event indicating it's done.
bool finished_;
};
// Singleton class that manages text-to-speech for the TTS extension APIs,
// potentially maintaining a queue of pending utterances and keeping track of
// all state.
class TtsController {
public:
virtual ~TtsController() = default;
// Set the TTS platform implementation to use.
virtual void SetPlatformImpl(
std::unique_ptr<TtsPlatformImpl> platform_impl) = 0;
// Returns true if we're currently speaking an utterance.
virtual bool IsSpeaking() = 0;
// Speak the given utterance. If the utterance's can_enqueue flag is true
// and another utterance is in progress, adds it to the end of the queue.
// Otherwise, interrupts any current utterance and speaks this one
// immediately.
virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
// Stop all utterances and flush the queue. Implies leaving pause mode
// as well.
virtual void Stop() = 0;
// Pause the speech queue. Some engines may support pausing in the middle
// of an utterance.
virtual void Pause() = 0;
// Resume speaking.
virtual void Resume() = 0;
// Handle events received from the speech engine. Events are forwarded to
// the callback function, and in addition, completion and error events
// trigger finishing the current utterance and starting the next one, if
// any.
virtual void OnTtsEvent(int utterance_id,
TtsEventType event_type,
int char_index,
const std::string& error_message) = 0;
// Return a list of all available voices, including the native voice,
// if supported, and all voices registered by extensions.
virtual void GetVoices(content::BrowserContext* browser_context,
std::vector<VoiceData>* out_voices) = 0;
// For unit testing.
virtual int QueueSize() = 0;
};
#endif // CHROMECAST_BROWSER_TTS_TTS_CONTROLLER_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromecast/browser/tts/tts_controller_impl.h"
#include <stddef.h>
#include <string>
#include <vector>
#include "base/containers/queue.h"
#include "base/metrics/histogram_macros.h"
#include "base/metrics/user_metrics.h"
#include "base/values.h"
#include "build/build_config.h"
#include "chromecast/browser/tts/tts_platform.h"
#include "chromecast/chromecast_buildflags.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom-forward.h"
#if BUILDFLAG(ENABLE_CHROMECAST_EXTENSIONS)
#include "extensions/browser/extensions_browser_client.h"
#endif
namespace {
// A value to be used to indicate that there is no char index available.
const int kInvalidCharIndex = -1;
// Given a language/region code of the form 'fr-FR', returns just the basic
// language portion, e.g. 'fr'.
std::string TrimLanguageCode(const std::string& lang) {
if (lang.size() >= 5 && lang[2] == '-')
return lang.substr(0, 2);
else
return lang;
}
// IMPORTANT!
// These values are written to logs. Do not renumber or delete
// existing items; add new entries to the end of the list.
enum class UMATextToSpeechEvent {
START = 0,
END = 1,
WORD = 2,
SENTENCE = 3,
MARKER = 4,
INTERRUPTED = 5,
CANCELLED = 6,
SPEECH_ERROR = 7,
PAUSE = 8,
RESUME = 9,
// This must always be the last enum. It's okay for its value to
// increase, but none of the other enum values may change.
COUNT
};
} // namespace
bool IsFinalTtsEventType(TtsEventType event_type) {
return (event_type == TTS_EVENT_END || event_type == TTS_EVENT_INTERRUPTED ||
event_type == TTS_EVENT_CANCELLED || event_type == TTS_EVENT_ERROR);
}
//
// UtteranceContinuousParameters
//
UtteranceContinuousParameters::UtteranceContinuousParameters()
: rate(blink::mojom::kSpeechSynthesisDoublePrefNotSet),
pitch(blink::mojom::kSpeechSynthesisDoublePrefNotSet),
volume(blink::mojom::kSpeechSynthesisDoublePrefNotSet) {}
//
// VoiceData
//
VoiceData::VoiceData()
: gender(TTS_GENDER_NONE), remote(false), native(false) {}
VoiceData::VoiceData(const VoiceData& other) = default;
VoiceData::~VoiceData() {}
//
// Utterance
//
// static
int Utterance::next_utterance_id_ = 0;
Utterance::Utterance(content::BrowserContext* browser_context)
: browser_context_(browser_context),
id_(next_utterance_id_++),
src_id_(-1),
gender_(TTS_GENDER_NONE),
can_enqueue_(false),
char_index_(0),
finished_(false) {
options_.reset(new base::DictionaryValue());
}
Utterance::~Utterance() {
// It's an error if an Utterance is destructed without being finished,
// unless |browser_context_| is nullptr because it's a unit test.
DCHECK(finished_ || !browser_context_);
}
void Utterance::OnTtsEvent(TtsEventType event_type,
int char_index,
const std::string& error_message) {
if (char_index >= 0)
char_index_ = char_index;
if (IsFinalTtsEventType(event_type))
finished_ = true;
if (event_delegate_)
event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
if (finished_)
event_delegate_ = nullptr;
}
void Utterance::Finish() {
finished_ = true;
}
void Utterance::set_options(const base::Value* options) {
options_.reset(options->DeepCopy());
}
//
// TtsControllerImpl
//
TtsControllerImpl::TtsControllerImpl(
std::unique_ptr<TtsPlatformImpl> platform_impl)
: current_utterance_(nullptr),
paused_(false),
platform_impl_(std::move(platform_impl)) {}
TtsControllerImpl::~TtsControllerImpl() {
if (current_utterance_) {
current_utterance_->Finish();
delete current_utterance_;
}
// Clear any queued utterances too.
ClearUtteranceQueue(false); // Don't sent events.
}
void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) {
// If we're paused and we get an utterance that can't be queued,
// flush the queue but stay in the paused state.
if (paused_ && !utterance->can_enqueue()) {
utterance_queue_.push(utterance);
Stop();
paused_ = true;
return;
}
if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
utterance_queue_.push(utterance);
} else {
Stop();
SpeakNow(utterance);
}
}
void TtsControllerImpl::SpeakNow(Utterance* utterance) {
// Get all available voices and try to find a matching voice.
std::vector<VoiceData> voices;
GetVoices(utterance->browser_context(), &voices);
// Get the best matching voice. If nothing matches, just set "native"
// to true because that might trigger deferred loading of native voices.
int index = GetMatchingVoice(utterance, voices);
VoiceData voice;
if (index >= 0)
voice = voices[index];
else
voice.native = true;
UpdateUtteranceDefaults(utterance);
GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
base::RecordAction(base::UserMetricsAction("TextToSpeech.Speak"));
UMA_HISTOGRAM_COUNTS_100000("TextToSpeech.Utterance.TextLength",
utterance->text().size());
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.FromExtensionAPI",
!utterance->src_url().is_empty());
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasVoiceName",
!utterance->voice_name().empty());
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasLang",
!utterance->lang().empty());
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasGender",
utterance->gender() != TTS_GENDER_NONE);
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasRate",
utterance->continuous_parameters().rate != 1.0);
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasPitch",
utterance->continuous_parameters().pitch != 1.0);
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.HasVolume",
utterance->continuous_parameters().volume != 1.0);
UMA_HISTOGRAM_BOOLEAN("TextToSpeech.Utterance.Native", voice.native);
if (!voice.native) {
#if !defined(OS_ANDROID)
DCHECK(!voice.extension_id.empty());
current_utterance_ = utterance;
utterance->set_extension_id(voice.extension_id);
bool sends_end_event =
voice.events.find(TTS_EVENT_END) != voice.events.end();
if (!sends_end_event) {
utterance->Finish();
delete utterance;
current_utterance_ = nullptr;
SpeakNextUtterance();
}
#endif
} else {
// It's possible for certain platforms to send start events immediately
// during |speak|.
current_utterance_ = utterance;
GetPlatformImpl()->clear_error();
bool success = GetPlatformImpl()->Speak(utterance->id(), utterance->text(),
utterance->lang(), voice,
utterance->continuous_parameters());
if (!success) {
current_utterance_ = nullptr;
utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
GetPlatformImpl()->error());
delete utterance;
return;
}
}
}
void TtsControllerImpl::Stop() {
base::RecordAction(base::UserMetricsAction("TextToSpeech.Stop"));
paused_ = false;
GetPlatformImpl()->clear_error();
GetPlatformImpl()->StopSpeaking();
if (current_utterance_)
current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
std::string());
FinishCurrentUtterance();
ClearUtteranceQueue(true); // Send events.
}
void TtsControllerImpl::Pause() {
base::RecordAction(base::UserMetricsAction("TextToSpeech.Pause"));
paused_ = true;
if (current_utterance_) {
GetPlatformImpl()->clear_error();
GetPlatformImpl()->Pause();
}
}
void TtsControllerImpl::Resume() {
base::RecordAction(base::UserMetricsAction("TextToSpeech.Resume"));
paused_ = false;
if (current_utterance_) {
GetPlatformImpl()->clear_error();
GetPlatformImpl()->Resume();
} else {
SpeakNextUtterance();
}
}
void TtsControllerImpl::OnTtsEvent(int utterance_id,
TtsEventType event_type,
int char_index,
const std::string& error_message) {
// We may sometimes receive completion callbacks "late", after we've
// already finished the utterance (for example because another utterance
// interrupted or we got a call to Stop). This is normal and we can
// safely just ignore these events.
if (!current_utterance_ || utterance_id != current_utterance_->id()) {
return;
}
UMATextToSpeechEvent metric;
switch (event_type) {
case TTS_EVENT_START:
metric = UMATextToSpeechEvent::START;
break;
case TTS_EVENT_END:
metric = UMATextToSpeechEvent::END;
break;
case TTS_EVENT_WORD:
metric = UMATextToSpeechEvent::WORD;
break;
case TTS_EVENT_SENTENCE:
metric = UMATextToSpeechEvent::SENTENCE;
break;
case TTS_EVENT_MARKER:
metric = UMATextToSpeechEvent::MARKER;
break;
case TTS_EVENT_INTERRUPTED:
metric = UMATextToSpeechEvent::INTERRUPTED;
break;
case TTS_EVENT_CANCELLED:
metric = UMATextToSpeechEvent::CANCELLED;
break;
case TTS_EVENT_ERROR:
metric = UMATextToSpeechEvent::SPEECH_ERROR;
break;
case TTS_EVENT_PAUSE:
metric = UMATextToSpeechEvent::PAUSE;
break;
case TTS_EVENT_RESUME:
metric = UMATextToSpeechEvent::RESUME;
break;
default:
NOTREACHED();
return;
}
UMA_HISTOGRAM_ENUMERATION("TextToSpeech.Event", metric,
UMATextToSpeechEvent::COUNT);
current_utterance_->OnTtsEvent(event_type, char_index, error_message);
if (current_utterance_->finished()) {
FinishCurrentUtterance();
SpeakNextUtterance();
}
}
void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context,
std::vector<VoiceData>* out_voices) {
TtsPlatformImpl* platform_impl = GetPlatformImpl();
if (platform_impl) {
// Ensure we have all built-in voices loaded. This is a no-op if already
// loaded.
if (platform_impl->PlatformImplAvailable())
platform_impl->GetVoices(out_voices);
}
}
bool TtsControllerImpl::IsSpeaking() {
return current_utterance_ != nullptr || GetPlatformImpl()->IsSpeaking();
}
void TtsControllerImpl::FinishCurrentUtterance() {
if (current_utterance_) {
if (!current_utterance_->finished())
current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
std::string());
delete current_utterance_;
current_utterance_ = nullptr;
}
}
void TtsControllerImpl::SpeakNextUtterance() {
if (paused_)
return;
// Start speaking the next utterance in the queue. Keep trying in case
// one fails but there are still more in the queue to try.
while (!utterance_queue_.empty() && !current_utterance_) {
Utterance* utterance = utterance_queue_.front();
utterance_queue_.pop();
SpeakNow(utterance);
}
}
void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
while (!utterance_queue_.empty()) {
Utterance* utterance = utterance_queue_.front();
utterance_queue_.pop();
if (send_events)
utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
std::string());
else
utterance->Finish();
delete utterance;
}
}
void TtsControllerImpl::SetPlatformImpl(
std::unique_ptr<TtsPlatformImpl> platform_impl) {
platform_impl_ = std::move(platform_impl);
}
int TtsControllerImpl::QueueSize() {
return static_cast<int>(utterance_queue_.size());
}
TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
return platform_impl_.get();
}
std::string TtsControllerImpl::GetApplicationLocale() const {
// TODO(rdaum): Delegate back to the platform's mechaninism here.
// For Chrome, use g_browser_process as before. For cast, it's the system
// locale. Hardcoded to system locale for now
#if BUILDFLAG(ENABLE_CHROMECAST_EXTENSIONS)
return extensions::ExtensionsBrowserClient::Get()->GetApplicationLocale();
#else
return "en-US";
#endif
}
int TtsControllerImpl::GetMatchingVoice(const Utterance* utterance,
std::vector<VoiceData>& voices) {
// Return the index of the voice that best match the utterance parameters.
//
// These criteria are considered mandatory - if they're specified, any voice
// that doesn't match is rejected.
//
// Extension ID
// Voice name
//
// The other criteria are scored based on how well they match, in
// this order of precedence:
//
// Utterange language (exact region preferred, then general language)
// App/system language (exact region preferred, then general language)
// Required event types
// Gender
std::string app_lang = GetApplicationLocale();
// Start with a best score of -1, that way even if none of the criteria
// match, something will be returned if there are any voices.
int best_score = -1;
int best_score_index = -1;
for (size_t i = 0; i < voices.size(); ++i) {
const VoiceData& voice = voices[i];
int score = 0;
// If the extension ID is specified, check for an exact match.
if (!utterance->extension_id().empty() &&
utterance->extension_id() != voice.extension_id)
continue;
// If the voice name is specified, check for an exact match.
if (!utterance->voice_name().empty() &&
voice.name != utterance->voice_name())
continue;
// Prefer the utterance language.
if (!voice.lang.empty() && !utterance->lang().empty()) {
// An exact language match is worth more than a partial match.
if (voice.lang == utterance->lang()) {
score += 32;
} else if (TrimLanguageCode(voice.lang) ==
TrimLanguageCode(utterance->lang())) {
score += 16;
}
}
// Prefer the system language after that.
if (!voice.lang.empty()) {
if (voice.lang == app_lang)
score += 8;
else if (TrimLanguageCode(voice.lang) == TrimLanguageCode(app_lang))
score += 4;
}
// Next, prefer required event types.
if (utterance->required_event_types().size() > 0) {
bool has_all_required_event_types = true;
for (std::set<TtsEventType>::const_iterator iter =
utterance->required_event_types().begin();
iter != utterance->required_event_types().end(); ++iter) {
if (voice.events.find(*iter) == voice.events.end()) {
has_all_required_event_types = false;
break;
}
}
if (has_all_required_event_types)
score += 2;
}
// Finally prefer the requested gender last.
if (voice.gender != TTS_GENDER_NONE &&
utterance->gender() != TTS_GENDER_NONE &&
voice.gender == utterance->gender()) {
score += 1;
}
if (score > best_score) {
best_score = score;
best_score_index = i;
}
}
return best_score_index;
}
void TtsControllerImpl::UpdateUtteranceDefaults(Utterance* utterance) {
double rate = utterance->continuous_parameters().rate;
double pitch = utterance->continuous_parameters().pitch;
double volume = utterance->continuous_parameters().volume;
// Update pitch, rate and volume to defaults if not explicity set on
// this utterance.
if (rate == blink::mojom::kSpeechSynthesisDoublePrefNotSet)
rate = blink::mojom::kSpeechSynthesisDefaultRate;
if (pitch == blink::mojom::kSpeechSynthesisDoublePrefNotSet)
pitch = blink::mojom::kSpeechSynthesisDefaultPitch;
if (volume == blink::mojom::kSpeechSynthesisDoublePrefNotSet)
volume = blink::mojom::kSpeechSynthesisDefaultVolume;
utterance->set_continuous_parameters(rate, pitch, volume);
}
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMECAST_BROWSER_TTS_TTS_CONTROLLER_IMPL_H_
#define CHROMECAST_BROWSER_TTS_TTS_CONTROLLER_IMPL_H_
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "base/containers/queue.h"
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/memory/singleton.h"
#include "chromecast/browser/tts/tts_controller.h"
#include "url/gurl.h"
namespace content {
class BrowserContext;
} // namespace content
// Singleton class that manages text-to-speech for the TTS and TTS engine
// extension APIs, maintaining a queue of pending utterances and keeping
// track of all state.
class TtsControllerImpl : public TtsController {
public:
explicit TtsControllerImpl(std::unique_ptr<TtsPlatformImpl> platform_impl);
~TtsControllerImpl() override;
// TtsController methods
bool IsSpeaking() override;
void SpeakOrEnqueue(Utterance* utterance) override;
void Stop() override;
void Pause() override;
void Resume() override;
void OnTtsEvent(int utterance_id,
TtsEventType event_type,
int char_index,
const std::string& error_message) override;
void GetVoices(content::BrowserContext* browser_context,
std::vector<VoiceData>* out_voices) override;
void SetPlatformImpl(std::unique_ptr<TtsPlatformImpl> platform_impl) override;
int QueueSize() override;
std::string GetApplicationLocale() const;
private:
FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestGetMatchingVoice);
FRIEND_TEST_ALL_PREFIXES(TtsControllerTest,
TestTtsControllerUtteranceDefaults);
// Get the platform TTS implementation (or injected mock).
TtsPlatformImpl* GetPlatformImpl();
// Start speaking the given utterance. Will either take ownership of
// |utterance| or delete it if there's an error. Returns true on success.
void SpeakNow(Utterance* utterance);
// Clear the utterance queue. If send_events is true, will send
// TTS_EVENT_CANCELLED events on each one.
void ClearUtteranceQueue(bool send_events);
// Finalize and delete the current utterance.
void FinishCurrentUtterance();
// Start speaking the next utterance in the queue.
void SpeakNextUtterance();
// Given an utterance and a vector of voices, return the
// index of the voice that best matches the utterance.
int GetMatchingVoice(const Utterance* utterance,
std::vector<VoiceData>& voices);
// Updates the utterance to have default values for rate, pitch, and
// volume if they have not yet been set. On Chrome OS, defaults are
// pulled from user prefs, and may not be the same as other platforms.
void UpdateUtteranceDefaults(Utterance* utterance);
// The current utterance being spoken.
Utterance* current_utterance_;
// Whether the queue is paused or not.
bool paused_;
// A queue of utterances to speak after the current one finishes.
base::queue<Utterance*> utterance_queue_;
// A pointer to the platform implementation of text-to-speech.
std::unique_ptr<TtsPlatformImpl> platform_impl_;
DISALLOW_COPY_AND_ASSIGN(TtsControllerImpl);
};
#endif // CHROMECAST_BROWSER_TTS_TTS_CONTROLLER_IMPL_H_
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Unit tests for the TTS Controller.
#include "base/values.h"
#include "chromecast/browser/tts/tts_controller_impl.h"
#include "chromecast/browser/tts/tts_platform.h"
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/testing_pref_service.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/platform/web_speech_synthesis_constants.h"
namespace chromecast {
class TtsControllerTest : public testing::Test {};
// Platform Tts implementation that does nothing.
class DummyTtsPlatformImpl : public TtsPlatformImpl {
public:
DummyTtsPlatformImpl() {}
~DummyTtsPlatformImpl() override {}
bool PlatformImplAvailable() override { return true; }
bool Speak(int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params) override {
return true;
}
bool IsSpeaking() override { return false; }
bool StopSpeaking() override { return true; }
void Pause() override {}
void Resume() override {}
void GetVoices(std::vector<VoiceData>* out_voices) override {}
std::string error() override { return std::string(); }
void clear_error() override {}
void set_error(const std::string& error) override {}
};
// Subclass of TtsController with a public ctor and dtor.
class TestableTtsController : public TtsControllerImpl {
public:
TestableTtsController() {}
~TestableTtsController() override {}
};
TEST_F(TtsControllerTest, TestTtsControllerShutdown) {
DummyTtsPlatformImpl platform_impl;
TestableTtsController* controller = new TestableTtsController();
controller->SetPlatformImpl(&platform_impl);
Utterance* utterance1 = new Utterance(nullptr);
utterance1->set_can_enqueue(true);
utterance1->set_src_id(1);
controller->SpeakOrEnqueue(utterance1);
Utterance* utterance2 = new Utterance(nullptr);
utterance2->set_can_enqueue(true);
utterance2->set_src_id(2);
controller->SpeakOrEnqueue(utterance2);
// Make sure that deleting the controller when there are pending
// utterances doesn't cause a crash.
delete controller;
}
TEST_F(TtsControllerTest, TestGetMatchingVoice) {
TtsControllerImpl* tts_controller = TtsControllerImpl::GetInstance();
{
// Calling GetMatchingVoice with no voices returns -1.
Utterance utterance(nullptr);
std::vector<VoiceData> voices;
EXPECT_EQ(-1, tts_controller->GetMatchingVoice(&utterance, voices));
}
{
// Calling GetMatchingVoice with any voices returns the first one
// even if there are no criteria that match.
Utterance utterance(nullptr);
std::vector<VoiceData> voices;
voices.push_back(VoiceData());
voices.push_back(VoiceData());
EXPECT_EQ(0, tts_controller->GetMatchingVoice(&utterance, voices));
}
{
// If nothing else matches, the English voice is returned.
// (In tests the language will always be English.)
Utterance utterance(nullptr);
std::vector<VoiceData> voices;
VoiceData fr_voice;
fr_voice.lang = "fr";
voices.push_back(fr_voice);
VoiceData en_voice;
en_voice.lang = "en";
voices.push_back(en_voice);
VoiceData de_voice;
de_voice.lang = "de";
voices.push_back(de_voice);
EXPECT_EQ(1, tts_controller->GetMatchingVoice(&utterance, voices));
}
{
// Check precedence of various matching criteria.
std::vector<VoiceData> voices;
VoiceData voice0;
voices.push_back(voice0);
VoiceData voice1;
voice1.gender = TTS_GENDER_FEMALE;
voices.push_back(voice1);
VoiceData voice2;
voice2.events.insert(TTS_EVENT_WORD);
voices.push_back(voice2);
VoiceData voice3;
voice3.lang = "de-DE";
voices.push_back(voice3);
VoiceData voice4;
voice4.lang = "fr-CA";
voices.push_back(voice4);
VoiceData voice5;
voice5.name = "Voice5";
voices.push_back(voice5);
VoiceData voice6;
voice6.extension_id = "id6";
voices.push_back(voice6);
Utterance utterance(nullptr);
EXPECT_EQ(0, tts_controller->GetMatchingVoice(&utterance, voices));
utterance.set_gender(TTS_GENDER_FEMALE);
EXPECT_EQ(1, tts_controller->GetMatchingVoice(&utterance, voices));
std::set<TtsEventType> types;
types.insert(TTS_EVENT_WORD);
utterance.set_required_event_types(types);
EXPECT_EQ(2, tts_controller->GetMatchingVoice(&utterance, voices));
utterance.set_lang("de-DE");
EXPECT_EQ(3, tts_controller->GetMatchingVoice(&utterance, voices));
utterance.set_lang("fr-FR");
EXPECT_EQ(4, tts_controller->GetMatchingVoice(&utterance, voices));
utterance.set_voice_name("Voice5");
EXPECT_EQ(5, tts_controller->GetMatchingVoice(&utterance, voices));
utterance.set_voice_name("");
utterance.set_extension_id("id6");
EXPECT_EQ(6, tts_controller->GetMatchingVoice(&utterance, voices));
}
}
} // namespace chromecast
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromecast/browser/tts/tts_platform.h"
#include <string>
std::string TtsPlatformImpl::error() {
return error_;
}
void TtsPlatformImpl::clear_error() {
error_ = std::string();
}
void TtsPlatformImpl::set_error(const std::string& error) {
error_ = error;
}
void TtsPlatformImpl::WillSpeakUtteranceWithVoice(const Utterance* utterance,
const VoiceData& voice_data) {
}
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMECAST_BROWSER_TTS_TTS_PLATFORM_H_
#define CHROMECAST_BROWSER_TTS_TTS_PLATFORM_H_
#include <string>
#include "base/macros.h"
#include "chromecast/browser/tts/tts_controller.h"
// Abstract class that defines the native platform TTS interface,
// subclassed by specific implementations on Win, Mac, etc.
class TtsPlatformImpl {
public:
TtsPlatformImpl() {}
virtual ~TtsPlatformImpl() {}
// Returns true if this platform implementation is supported and available.
virtual bool PlatformImplAvailable() = 0;
// Speak the given utterance with the given parameters if possible,
// and return true on success. Utterance will always be nonempty.
// If rate, pitch, or volume are -1.0, they will be ignored.
//
// The TtsController will only try to speak one utterance at
// a time. If it wants to interrupt speech, it will always call Stop
// before speaking again.
virtual bool Speak(int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params) = 0;
// Stop speaking immediately and return true on success.
virtual bool StopSpeaking() = 0;
// Returns whether any speech is on going.
virtual bool IsSpeaking() = 0;
// Append information about voices provided by this platform implementation
// to |out_voices|.
virtual void GetVoices(std::vector<VoiceData>* out_voices) = 0;
// Pause the current utterance, if any, until a call to Resume,
// Speak, or StopSpeaking.
virtual void Pause() = 0;
// Resume speaking the current utterance, if it was paused.
virtual void Resume() = 0;
// Allows the platform to monitor speech commands and the voices used
// for each one.
virtual void WillSpeakUtteranceWithVoice(const Utterance* utterance,
const VoiceData& voice_data);
virtual std::string error();
virtual void clear_error();
virtual void set_error(const std::string& error);
protected:
std::string error_;
DISALLOW_COPY_AND_ASSIGN(TtsPlatformImpl);
};
#endif // CHROMECAST_BROWSER_TTS_TTS_PLATFORM_H_
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromecast/browser/tts/tts_platform_stub.h"
#include "base/logging.h"
namespace chromecast {
bool TtsPlatformImplStub::PlatformImplAvailable() {
return true;
}
bool TtsPlatformImplStub::Speak(int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params) {
LOG(INFO) << "Speak: " << utterance;
return true;
}
bool TtsPlatformImplStub::StopSpeaking() {
LOG(INFO) << "StopSpeaking";
return true;
}
void TtsPlatformImplStub::Pause() {
LOG(INFO) << "Pause";
}
void TtsPlatformImplStub::Resume() {
LOG(INFO) << "Resume";
}
bool TtsPlatformImplStub::IsSpeaking() {
LOG(INFO) << "IsSpeaking";
return false;
}
void TtsPlatformImplStub::GetVoices(std::vector<VoiceData>* out_voices) {
LOG(INFO) << "GetVoices";
}
} // namespace chromecast
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromecast/browser/tts/tts_controller.h"
#include "chromecast/browser/tts/tts_platform.h"
#ifndef CHROMECAST_BROWSER_TTS_TTS_PLATFORM_STUB_H_
#define CHROMECAST_BROWSER_TTS_TTS_PLATFORM_STUB_H_
namespace chromecast {
// The default stub implementation of TtsPlaform for Cast that merely logs TTS
// events.
class TtsPlatformImplStub : public TtsPlatformImpl {
public:
TtsPlatformImplStub() = default;
~TtsPlatformImplStub() override = default;
bool PlatformImplAvailable() override;
bool Speak(int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params) override;
bool StopSpeaking() override;
void Pause() override;
void Resume() override;
bool IsSpeaking() override;
void GetVoices(std::vector<VoiceData>* out_voices) override;
private:
DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplStub);
};
} // namespace chromecast
#endif // CHROMECAST_BROWSER_TTS_TTS_PLATFORM_STUB_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment