Move ISpVoice instance to a worker thread

This CL is moving the speech synthesizer code to a worker thread. The TtsPlatformImpl is using an ISpVoice COM interface to perform text-to-speech actions. The instance was living on the UI Main Thread which was using STA Apartment. This CL is splitting the logic in two parts. The first part is the control logic which stay on the UI Thread. The second part is the synthesizer logic which is running on a worker thread. Bug: 869399, 1133813 Change-Id: I8f8d939950cc39637f87d101ed362123a51e985e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2462341 Commit-Queue: Etienne Bergeron <etienneb@chromium.org> Reviewed-by: François Doray <fdoray@chromium.org> Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Cr-Commit-Position: refs/heads/master@{#820340}

Move ISpVoice instance to a worker thread
This CL is moving the speech synthesizer code to a worker thread. The TtsPlatformImpl is using an ISpVoice COM interface to perform text-to-speech actions. The instance was living on the UI Main Thread which was using STA Apartment. This CL is splitting the logic in two parts. The first part is the control logic which stay on the UI Thread. The second part is the synthesizer logic which is running on a worker thread. Bug: 869399, 1133813 Change-Id: I8f8d939950cc39637f87d101ed362123a51e985e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2462341 Commit-Queue: Etienne Bergeron <etienneb@chromium.org> Reviewed-by: François Doray <fdoray@chromium.org> Reviewed-by: Dominic Mazzoni <dmazzoni@chromium.org> Cr-Commit-Position: refs/heads/master@{#820340}
b642822f · Etienne Bergeron · Commit Bot · 945d831a · b642822f
Commit b642822f authored Oct 23, 2020 by Etienne Bergeron Committed by Commit Bot Oct 23, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 400 additions and 141 deletions

content/browser/speech/tts_win.cc content/browser/speech/tts_win.cc +400 -141

No files found.
--- a/content/browser/speech/tts_win.cc
+++ b/content/browser/speech/tts_win.cc
@@ -7,30 +7,135 @@
 #include <sapi.h>
 #include <stdint.h>
 #include <wrl/client.h>
+#include <wrl/implements.h>
 #include <algorithm>
 #include "base/bind.h"
 #include "base/macros.h"
 #include "base/no_destructor.h"
+#include "base/sequenced_task_runner.h"
 #include "base/strings/string_number_conversions.h"
 #include "base/strings/string_piece.h"
 #include "base/strings/utf_string_conversions.h"
+#include "base/synchronization/lock.h"
+#include "base/task/task_traits.h"
+#include "base/task/thread_pool.h"
+#include "base/task_runner.h"
+#include "base/thread_annotations.h"
+#include "base/threading/sequence_bound.h"
 #include "base/values.h"
 #include "base/win/scoped_co_mem.h"
 #include "base/win/sphelper.h"
 #include "content/browser/speech/tts_platform_impl.h"
+#include "content/public/browser/browser_task_traits.h"
+#include "content/public/browser/browser_thread.h"
 #include "content/public/browser/tts_controller.h"
 namespace content {
 namespace {
+class TtsPlatformImplWin;
+class TtsPlatformImplBackgroundWorker;
 // ISpObjectToken key and value names.
 const wchar_t kAttributesKey[] = L"Attributes";
 const wchar_t kLanguageValue[] = L"Language";
-}  // anonymous namespace.
+// This COM interface is receiving the TTS events on the ISpVoice asynchronous
+// worker thread and is emitting a notification task
+// TtsPlatformImplBackgroundWorker::SendTtsEvent(...) on the worker sequence.
+class TtsEventSink
+    : public Microsoft::WRL::RuntimeClass<
+          Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::ClassicCom>,
+          ISpNotifySink> {
+ public:
+  TtsEventSink(TtsPlatformImplBackgroundWorker* worker,
+               scoped_refptr<base::TaskRunner> worker_task_runner)
+      : worker_(worker), worker_task_runner_(std::move(worker_task_runner)) {}
+  // ISpNotifySink:
+  IFACEMETHODIMP Notify(void) override;
+  int GetUtteranceId() {
+    base::AutoLock lock(lock_);
+    return utterance_id_;
+  }
+  void SetUtteranceId(int utterance_id) {
+    base::AutoLock lock(lock_);
+    utterance_id_ = utterance_id;
+  }
+ private:
+  // |worker_| is leaky and must never deleted because TtsEventSink posts
+  // asynchronous tasks to it.
+  TtsPlatformImplBackgroundWorker* worker_;
+  scoped_refptr<base::TaskRunner> worker_task_runner_;
+  base::Lock lock_;
+  int utterance_id_ GUARDED_BY(lock_);
+};
+class TtsPlatformImplBackgroundWorker {
+ public:
+  explicit TtsPlatformImplBackgroundWorker(
+      scoped_refptr<base::TaskRunner> task_runner)
+      : tts_event_sink_(
+            Microsoft::WRL::Make<TtsEventSink>(this, std::move(task_runner))) {}
+  TtsPlatformImplBackgroundWorker(const TtsPlatformImplBackgroundWorker&) =
+      delete;
+  TtsPlatformImplBackgroundWorker& operator=(
+      const TtsPlatformImplBackgroundWorker&) = delete;
+  ~TtsPlatformImplBackgroundWorker() = default;
+  void Initialize();
+  void ProcessSpeech(int utterance_id,
+                     const std::string& lang,
+                     const VoiceData& voice,
+                     const UtteranceContinuousParameters& params,
+                     base::OnceCallback<void(bool)> on_speak_finished,
+                     const std::string& parsed_utterance);
+  void StopSpeaking(bool paused);
+  void Pause();
+  void Resume();
+  void Shutdown();
+  // This function is called after being notified by the speech synthetizer that
+  // there are TTS notifications are available and should be they should be
+  // processed.
+  void OnSpeechEvent(int utterance_id);
+  // Send an TTS event notification to the TTS controller.
+  void SendTtsEvent(int utterance_id,
+                    TtsEventType event_type,
+                    int char_index,
+                    int length = -1);
+ private:
+  void GetVoices(std::vector<VoiceData>* voices);
+  void SetVoiceFromName(const std::string& name);
+  // These apply to the current utterance only that is currently being processed
+  // on the worker thread. TTS events are dispatched by TtsEventSink to this
+  // class and update the current speaking state of the utterance.
+  std::string last_voice_name_;
+  ULONG stream_number_ = 0u;
+  int utterance_id_ = -1;
+  size_t utterance_char_position_ = 0u;
+  size_t utterance_prefix_length_ = 0u;
+  size_t utterance_length_ = 0u;
+  // The COM class ISpVoice lives within the COM MTA apartment (worker pool).
+  // This interface can not be called on the UI thread since UI thread is
+  // COM STA.
+  Microsoft::WRL::ComPtr<ISpVoice> speech_synthesizer_;
+  Microsoft::WRL::ComPtr<TtsEventSink> tts_event_sink_;
+};
 class TtsPlatformImplWin : public TtsPlatformImpl {
 public:
@@ -38,7 +143,7 @@ class TtsPlatformImplWin : public TtsPlatformImpl {
  TtsPlatformImplWin& operator=(const TtsPlatformImplWin&) = delete;
  bool PlatformImplSupported() override { return true; }
-  bool PlatformImplInitialized() override { return true; }
+  bool PlatformImplInitialized() override;
  void Speak(int utterance_id,
             const std::string& utterance,
@@ -59,19 +164,18 @@ class TtsPlatformImplWin : public TtsPlatformImpl {
  void Shutdown() override;
+  void OnInitializeComplete(bool success, std::vector<VoiceData> voices);
+  void OnSpeakScheduled(base::OnceCallback<void(bool)> on_speak_finished,
+                        bool success);
+  void OnSpeakFinished(int utterance_id);
  // Get the single instance of this class.
  static TtsPlatformImplWin* GetInstance();
-  static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
 private:
  friend base::NoDestructor<TtsPlatformImplWin>;
  TtsPlatformImplWin();
-  void OnSpeechEvent();
-  void SetVoiceFromName(const std::string& name);
  void ProcessSpeech(int utterance_id,
                     const std::string& lang,
                     const VoiceData& voice,
@@ -79,52 +183,63 @@ class TtsPlatformImplWin : public TtsPlatformImpl {
                     base::OnceCallback<void(bool)> on_speak_finished,
                     const std::string& parsed_utterance);
-  Microsoft::WRL::ComPtr<ISpVoice> speech_synthesizer_;
+  // These variables hold the platform state.
-  // These apply to the current utterance only.
-  std::wstring utterance_;
-  int utterance_id_ = 0;
-  int prefix_len_ = 0;
-  ULONG stream_number_ = 0u;
-  int char_position_ = 0;
-  int char_length_ = 0;
  bool paused_ = false;
-  std::string last_voice_name_;
+  bool is_speaking_ = false;
+  int utterance_id_ = -1;
+  bool platform_initialized_ = false;
+  std::vector<VoiceData> voices_;
+  // Hold the state and the code of the background implementation.
+  scoped_refptr<base::SequencedTaskRunner> worker_task_runner_;
+  base::SequenceBound<TtsPlatformImplBackgroundWorker> worker_;
 };
-// static
+HRESULT TtsEventSink::Notify() {
-TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
+  worker_task_runner_->PostTask(
-  return TtsPlatformImplWin::GetInstance();
+      FROM_HERE, base::BindOnce(&TtsPlatformImplBackgroundWorker::OnSpeechEvent,
+                                base::Unretained(worker_), GetUtteranceId()));
+  return S_OK;
 }
-void TtsPlatformImplWin::Speak(
+//
-    int utterance_id,
+// TtsPlatformImplBackgroundWorker
-    const std::string& utterance,
+//
-    const std::string& lang,
-    const VoiceData& voice,
+void TtsPlatformImplBackgroundWorker::Initialize() {
-    const UtteranceContinuousParameters& params,
+  bool success = false;
-    base::OnceCallback<void(bool)> on_speak_finished) {
+  std::vector<VoiceData> voices;
-  // Parse SSML and process speech.
-  TtsController::GetInstance()->StripSSML(
+  ::CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL,
-      utterance, base::BindOnce(&TtsPlatformImplWin::ProcessSpeech,
+                     IID_PPV_ARGS(&speech_synthesizer_));
-                                base::Unretained(this), utterance_id, lang,
+  if (speech_synthesizer_.Get()) {
-                                voice, params, std::move(on_speak_finished)));
+    ULONGLONG event_mask =
+        SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_TTS_BOOKMARK) |
+        SPFEI(SPEI_WORD_BOUNDARY) | SPFEI(SPEI_SENTENCE_BOUNDARY) |
+        SPFEI(SPEI_END_INPUT_STREAM);
+    speech_synthesizer_->SetInterest(event_mask, event_mask);
+    speech_synthesizer_->SetNotifySink(tts_event_sink_.Get());
+    GetVoices(&voices);
+    success = true;
+  }
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE,
+      base::BindOnce(&TtsPlatformImplWin::OnInitializeComplete,
+                     base::Unretained(TtsPlatformImplWin::GetInstance()),
+                     success, std::move(voices)));
 }
-void TtsPlatformImplWin::ProcessSpeech(
+void TtsPlatformImplBackgroundWorker::ProcessSpeech(
    int utterance_id,
    const std::string& lang,
    const VoiceData& voice,
    const UtteranceContinuousParameters& params,
    base::OnceCallback<void(bool)> on_speak_finished,
    const std::string& parsed_utterance) {
-  std::wstring prefix;
+  DCHECK(speech_synthesizer_.Get());
-  std::wstring suffix;
-  if (!speech_synthesizer_.Get()) {
-    std::move(on_speak_finished).Run(false);
-    return;
-  }
  SetVoiceFromName(voice.name);
@@ -137,6 +252,8 @@ void TtsPlatformImplWin::ProcessSpeech(
    speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate)));
  }
+  std::wstring prefix;
+  std::wstring suffix;
  if (params.pitch >= 0.0) {
    // The TTS api allows a range of -10 to 10 for speech pitch:
    // https://docs.microsoft.com/en-us/previous-versions/windows/desktop/ms720500(v%3Dvs.85)
@@ -158,70 +275,119 @@ void TtsPlatformImplWin::ProcessSpeech(
  // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
-  utterance_ = base::UTF8ToWide(parsed_utterance);
+  std::wstring utterance = base::UTF8ToWide(parsed_utterance);
+  std::wstring merged_utterance = prefix + utterance + suffix;
  utterance_id_ = utterance_id;
-  char_position_ = 0;
+  utterance_char_position_ = 0;
-  char_length_ = 0;
+  utterance_length_ = utterance.size();
-  std::wstring merged_utterance = prefix + utterance_ + suffix;
+  utterance_prefix_length_ = prefix.size();
-  prefix_len_ = prefix.size();
+  tts_event_sink_->SetUtteranceId(utterance_id);
  HRESULT result = speech_synthesizer_->Speak(merged_utterance.c_str(),
                                              SPF_ASYNC, &stream_number_);
-  std::move(on_speak_finished).Run((result == S_OK));
+  bool success = (result == S_OK);
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(std::move(on_speak_finished), success));
 }
-bool TtsPlatformImplWin::StopSpeaking() {
+void TtsPlatformImplBackgroundWorker::StopSpeaking(bool paused) {
  if (speech_synthesizer_.Get()) {
-    // Clear the stream number so that any further events relating to this
+    // Block notifications from the current utterance.
-    // utterance are ignored.
+    tts_event_sink_->SetUtteranceId(-1);
-    stream_number_ = 0;
+    utterance_id_ = -1;
-    if (IsSpeaking()) {
+    // Stop speech by speaking nullptr with the purge flag.
-      // Stop speech by speaking nullptr with the purge flag.
+    speech_synthesizer_->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr);
-      speech_synthesizer_->Speak(nullptr, SPF_ASYNC | SPF_PURGEBEFORESPEAK,
-                                 NULL);
+    // Ensures the synthesizer is not paused after a stop.
-    }
+    if (paused)
-    if (paused_) {
      speech_synthesizer_->Resume();
-      paused_ = false;
-    }
  }
-  return true;
 }
-void TtsPlatformImplWin::Pause() {
+void TtsPlatformImplBackgroundWorker::Pause() {
-  if (speech_synthesizer_.Get() && utterance_id_ && !paused_) {
+  if (speech_synthesizer_.Get()) {
    speech_synthesizer_->Pause();
-    paused_ = true;
+    SendTtsEvent(utterance_id_, TTS_EVENT_PAUSE, utterance_char_position_);
-    TtsController::GetInstance()->OnTtsEvent(utterance_id_, TTS_EVENT_PAUSE,
-                                             char_position_, -1, "");
  }
 }
-void TtsPlatformImplWin::Resume() {
+void TtsPlatformImplBackgroundWorker::Resume() {
-  if (speech_synthesizer_.Get() && utterance_id_ && paused_) {
+  if (speech_synthesizer_.Get()) {
    speech_synthesizer_->Resume();
-    paused_ = false;
+    SendTtsEvent(utterance_id_, TTS_EVENT_RESUME, utterance_char_position_);
-    TtsController::GetInstance()->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME,
-                                             char_position_, -1, "");
  }
 }
-bool TtsPlatformImplWin::IsSpeaking() {
+void TtsPlatformImplBackgroundWorker::Shutdown() {
-  if (speech_synthesizer_.Get()) {
+  speech_synthesizer_->SetNotifySink(nullptr);
-    SPVOICESTATUS status;
+  tts_event_sink_ = nullptr;
-    HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
+  speech_synthesizer_ = nullptr;
-    if (result == S_OK) {
+}
-      if (status.dwRunningState == 0 ||  // 0 == waiting to speak
-          status.dwRunningState == SPRS_IS_SPEAKING) {
+void TtsPlatformImplBackgroundWorker::OnSpeechEvent(int utterance_id) {
-        return true;
+  if (!speech_synthesizer_.Get())
-      }
+    return;
+  SPEVENT event;
+  while (S_OK == speech_synthesizer_->GetEvents(1, &event, nullptr)) {
+    // Ignore notifications that are not related to the current utterance.
+    if (event.ulStreamNum != stream_number_ || utterance_id_ == -1 ||
+        utterance_id != utterance_id_) {
+      continue;
+    }
+    switch (event.eEventId) {
+      case SPEI_START_INPUT_STREAM:
+        utterance_char_position_ = 0;
+        SendTtsEvent(utterance_id_, TTS_EVENT_START, utterance_char_position_);
+        break;
+      case SPEI_END_INPUT_STREAM:
+        GetUIThreadTaskRunner({})->PostTask(
+            FROM_HERE,
+            base::BindOnce(&TtsPlatformImplWin::OnSpeakFinished,
+                           base::Unretained(TtsPlatformImplWin::GetInstance()),
+                           utterance_id_));
+        utterance_char_position_ = utterance_length_;
+        SendTtsEvent(utterance_id_, TTS_EVENT_END, utterance_char_position_);
+        break;
+      case SPEI_TTS_BOOKMARK:
+        SendTtsEvent(utterance_id_, TTS_EVENT_MARKER, utterance_char_position_);
+        break;
+      case SPEI_WORD_BOUNDARY:
+        SendTtsEvent(utterance_id_, TTS_EVENT_WORD, utterance_char_position_,
+                     static_cast<ULONG>(event.wParam));
+        break;
+      case SPEI_SENTENCE_BOUNDARY:
+        SendTtsEvent(utterance_id_, TTS_EVENT_SENTENCE,
+                     utterance_char_position_);
+        break;
+      default:
+        break;
    }
  }
-  return false;
 }
-void TtsPlatformImplWin::GetVoices(std::vector<VoiceData>* out_voices) {
+void TtsPlatformImplBackgroundWorker::SendTtsEvent(int utterance_id,
+                                                   TtsEventType event_type,
+                                                   int char_index,
+                                                   int length) {
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&TtsController::OnTtsEvent,
+                                base::Unretained(TtsController::GetInstance()),
+                                utterance_id, event_type, char_index, length,
+                                std::string()));
+}
+void TtsPlatformImplBackgroundWorker::GetVoices(
+    std::vector<VoiceData>* out_voices) {
+  if (!speech_synthesizer_.Get())
+    return;
  Microsoft::WRL::ComPtr<IEnumSpObjectTokens> voice_tokens;
  unsigned long voice_count;
  if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, &voice_tokens))
@@ -267,54 +433,11 @@ void TtsPlatformImplWin::GetVoices(std::vector<VoiceData>* out_voices) {
  }
 }
-void TtsPlatformImplWin::OnSpeechEvent() {
+void TtsPlatformImplBackgroundWorker::SetVoiceFromName(
-  if (!speech_synthesizer_.Get())
+    const std::string& name) {
-    return;
-  TtsController* controller = TtsController::GetInstance();
-  SPEVENT event;
-  while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
-    if (event.ulStreamNum != stream_number_)
-      continue;
-    switch (event.eEventId) {
-      case SPEI_START_INPUT_STREAM:
-        controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, -1,
-                               std::string());
-        break;
-      case SPEI_END_INPUT_STREAM:
-        char_position_ = utterance_.size();
-        controller->OnTtsEvent(utterance_id_, TTS_EVENT_END, char_position_, 0,
-                               std::string());
-        break;
-      case SPEI_TTS_BOOKMARK:
-        controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, char_position_,
-                               -1, std::string());
-        break;
-      case SPEI_WORD_BOUNDARY:
-        char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
-        char_length_ = static_cast<ULONG>(event.wParam);
-        controller->OnTtsEvent(utterance_id_, TTS_EVENT_WORD, char_position_,
-                               char_length_, std::string());
-        break;
-      case SPEI_SENTENCE_BOUNDARY:
-        char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
-        controller->OnTtsEvent(utterance_id_, TTS_EVENT_SENTENCE,
-                               char_position_, -1, std::string());
-        break;
-      default:
-        break;
-    }
-  }
-}
-void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {
  if (name.empty() || name == last_voice_name_)
    return;
-  if (!speech_synthesizer_.Get())
-    return;
  last_voice_name_ = name;
  Microsoft::WRL::ComPtr<IEnumSpObjectTokens> voice_tokens;
@@ -339,24 +462,158 @@ void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {
  }
 }
+//
+// TtsPlatformImplWin
+//
+bool TtsPlatformImplWin::PlatformImplInitialized() {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  return platform_initialized_;
+}
+void TtsPlatformImplWin::Speak(
+    int utterance_id,
+    const std::string& utterance,
+    const std::string& lang,
+    const VoiceData& voice,
+    const UtteranceContinuousParameters& params,
+    base::OnceCallback<void(bool)> on_speak_finished) {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  DCHECK(platform_initialized_);
+  // Do not emit utterance if the platform is not ready.
+  if (paused_ || is_speaking_) {
+    std::move(on_speak_finished).Run(false);
+    return;
+  }
+  // Flag that a utterance is getting emitted. The |is_speaking_| flag will be
+  // set back to false when the utterance will be fully spoken, stopped or if
+  // the voice synthetizer was not able to emit it.
+  is_speaking_ = true;
+  utterance_id_ = utterance_id;
+  // Parse SSML and process speech.
+  TtsController::GetInstance()->StripSSML(
+      utterance,
+      base::BindOnce(&TtsPlatformImplWin::ProcessSpeech, base::Unretained(this),
+                     utterance_id, lang, voice, params,
+                     base::BindOnce(&TtsPlatformImplWin::OnSpeakScheduled,
+                                    base::Unretained(this),
+                                    std::move(on_speak_finished))));
+}
+bool TtsPlatformImplWin::StopSpeaking() {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::StopSpeaking,
+               paused_);
+  paused_ = false;
+  is_speaking_ = false;
+  utterance_id_ = -1;
+  return true;
+}
+void TtsPlatformImplWin::Pause() {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  DCHECK(platform_initialized_);
+  if (paused_ || !is_speaking_)
+    return;
+  worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Pause);
+  paused_ = true;
+}
+void TtsPlatformImplWin::Resume() {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  DCHECK(platform_initialized_);
+  if (!paused_ || !is_speaking_)
+    return;
+  worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Resume);
+  paused_ = false;
+}
+bool TtsPlatformImplWin::IsSpeaking() {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  DCHECK(platform_initialized_);
+  return is_speaking_ && !paused_;
+}
+void TtsPlatformImplWin::GetVoices(std::vector<VoiceData>* out_voices) {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  DCHECK(platform_initialized_);
+  out_voices->insert(out_voices->end(), voices_.begin(), voices_.end());
+}
 void TtsPlatformImplWin::Shutdown() {
  // This is required to ensures the object is released before the COM is
  // uninitialized. Otherwise, this is causing shutdown hangs.
-  speech_synthesizer_ = nullptr;
+  worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Shutdown);
 }
-TtsPlatformImplWin::TtsPlatformImplWin() {
+void TtsPlatformImplWin::OnInitializeComplete(bool success,
-  ::CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL,
+                                              std::vector<VoiceData> voices) {
-                     IID_PPV_ARGS(&speech_synthesizer_));
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
-  if (speech_synthesizer_.Get()) {
-    ULONGLONG event_mask =
+  if (success)
-        SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_TTS_BOOKMARK) |
+    voices_ = std::move(voices);
-        SPFEI(SPEI_WORD_BOUNDARY) | SPFEI(SPEI_SENTENCE_BOUNDARY) |
-        SPFEI(SPEI_END_INPUT_STREAM);
+  platform_initialized_ = true;
-    speech_synthesizer_->SetInterest(event_mask, event_mask);
+  TtsController::GetInstance()->VoicesChanged();
-    speech_synthesizer_->SetNotifyCallbackFunction(
+}
-        TtsPlatformImplWin::SpeechEventCallback, 0, 0);
+void TtsPlatformImplWin::OnSpeakScheduled(
+    base::OnceCallback<void(bool)> on_speak_finished,
+    bool success) {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  DCHECK(is_speaking_);
+  // If the utterance was not able to be emitted, stop the speaking. There
+  // won't be any asynchronous TTS event to confirm the end of the speech.
+  if (!success) {
+    is_speaking_ = false;
+    utterance_id_ = -1;
  }
+  // Pass the results to our caller.
+  std::move(on_speak_finished).Run(success);
+}
+void TtsPlatformImplWin::OnSpeakFinished(int utterance_id) {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  if (utterance_id != utterance_id_)
+    return;
+  DCHECK(is_speaking_);
+  DCHECK_NE(utterance_id_, -1);
+  is_speaking_ = false;
+  utterance_id_ = -1;
+}
+void TtsPlatformImplWin::ProcessSpeech(
+    int utterance_id,
+    const std::string& lang,
+    const VoiceData& voice,
+    const UtteranceContinuousParameters& params,
+    base::OnceCallback<void(bool)> on_speak_finished,
+    const std::string& parsed_utterance) {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::ProcessSpeech,
+               utterance_id, lang, voice, params, std::move(on_speak_finished),
+               parsed_utterance);
+}
+TtsPlatformImplWin::TtsPlatformImplWin()
+    : worker_task_runner_(
+          base::ThreadPool::CreateSequencedTaskRunner({base::MayBlock()})),
+      worker_(worker_task_runner_, worker_task_runner_) {
+  DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
+  worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Initialize);
 }
 // static
@@ -365,9 +622,11 @@ TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
  return tts_platform.get();
 }
+}  // namespace
 // static
-void TtsPlatformImplWin::SpeechEventCallback(WPARAM w_param, LPARAM l_param) {
+TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
-  GetInstance()->OnSpeechEvent();
+  return TtsPlatformImplWin::GetInstance();
 }
 }  // namespace content