[Live Caption] Pass a message to the browser when the speech

recognition service is ready. As soon as a user presses play on a video or audio, the caption bubble should appear with the text "Detecting speech...". To do this, this CL introduces an OnSpeechRecognitionReady function to the CaptionHost mojo interface, which is called the first time the ChromeSpeechRecognitionClient receives audio to be sent to the speech service. The ChromeSpeechRecognitionClient is constructed when the media is first constructed, which is why this is called when audio is received rather than when the recognizer is bound (which is when the on_ready_callback is called back to the renderer). The flow of information from the speech recognition service to the UI is as follows: ChromeSpeechRecognitionClient::SendAudioToSpeechRecognitionService() -----chrome::mojom::CaptionHost OnSpeechRecognitionReady----- CaptionHostImpl::OnSpeechRecognitionReady() CaptionController::OnSpeechRecognitionReady( content::WebContents* web_contents) CaptionBubbleControllerViews::OnSpeechRecognitionReady() CaptionBubbleModel::OnReady() Bug: 1055150 Change-Id: Ib5eff4034b236b1ee1c6747628b11f9f21506460 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2321406Reviewed-by: Avi Drissman <avi@chromium.org> Reviewed-by: Alex Gough <ajgo@chromium.org> Reviewed-by: Katie Dektar <katie@chromium.org> Commit-Queue: Abigail Klein <abigailbklein@google.com> Cr-Commit-Position: refs/heads/master@{#793684}

[Live Caption] Pass a message to the browser when the speech
recognition service is ready. As soon as a user presses play on a video or audio, the caption bubble should appear with the text "Detecting speech...". To do this, this CL introduces an OnSpeechRecognitionReady function to the CaptionHost mojo interface, which is called the first time the ChromeSpeechRecognitionClient receives audio to be sent to the speech service. The ChromeSpeechRecognitionClient is constructed when the media is first constructed, which is why this is called when audio is received rather than when the recognizer is bound (which is when the on_ready_callback is called back to the renderer). The flow of information from the speech recognition service to the UI is as follows: ChromeSpeechRecognitionClient::SendAudioToSpeechRecognitionService() -----chrome::mojom::CaptionHost OnSpeechRecognitionReady----- CaptionHostImpl::OnSpeechRecognitionReady() CaptionController::OnSpeechRecognitionReady( content::WebContents* web_contents) CaptionBubbleControllerViews::OnSpeechRecognitionReady() CaptionBubbleModel::OnReady() Bug: 1055150 Change-Id: Ib5eff4034b236b1ee1c6747628b11f9f21506460 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2321406Reviewed-by: Avi Drissman <avi@chromium.org> Reviewed-by: Alex Gough <ajgo@chromium.org> Reviewed-by: Katie Dektar <katie@chromium.org> Commit-Queue: Abigail Klein <abigailbklein@google.com> Cr-Commit-Position: refs/heads/master@{#793684}
3c53d30e · Abigail Klein · Commit Bot · df80f840 · 3c53d30e · 3c53d30e
Commit 3c53d30e authored Jul 31, 2020 by Abigail Klein Committed by Commit Bot Jul 31, 2020
17 changed files
--- a/chrome/browser/accessibility/caption_controller.cc
+++ b/chrome/browser/accessibility/caption_controller.cc
@@ -161,6 +161,15 @@ void CaptionController::OnBrowserRemoved(Browser* browser) {
  caption_bubble_controllers_.erase(browser);
 }
+bool CaptionController::OnSpeechRecognitionReady(
+    content::WebContents* web_contents) {
+  Browser* browser = chrome::FindBrowserWithWebContents(web_contents);
+  if (!browser || !caption_bubble_controllers_.count(browser))
+    return false;
+  return caption_bubble_controllers_[browser]->OnSpeechRecognitionReady(
+      web_contents);
+}
 bool CaptionController::DispatchTranscription(
    content::WebContents* web_contents,
    const chrome::mojom::TranscriptionResultPtr& transcription_result) {

--- a/chrome/browser/accessibility/caption_controller.h
+++ b/chrome/browser/accessibility/caption_controller.h
@@ -64,6 +64,12 @@ class CaptionController : public BrowserListObserver, public KeyedService {
  void Init();
+  // Alerts the CaptionBubbleController that belongs to the appropriate browser
+  // that speech recognition is ready to start for the given web contents.
+  // Returns whether this message was routed successfully. Transcriptions will
+  // not proceed if this returns false.
+  bool OnSpeechRecognitionReady(content::WebContents* web_contents);
  // Routes a transcription to the CaptionBubbleController that belongs to the
  // appropriate browser. Returns whether the transcription result was routed
  // successfully. Transcriptions will halt if this returns false.

--- a/chrome/browser/accessibility/caption_controller_browsertest.cc
+++ b/chrome/browser/accessibility/caption_controller_browsertest.cc
--- a/chrome/browser/accessibility/caption_host_impl.cc
+++ b/chrome/browser/accessibility/caption_host_impl.cc
@@ -39,6 +39,29 @@ CaptionHostImpl::CaptionHostImpl(content::RenderFrameHost* frame_host)
 CaptionHostImpl::~CaptionHostImpl() = default;
+void CaptionHostImpl::OnSpeechRecognitionReady(
+    OnSpeechRecognitionReadyCallback reply) {
+  if (!frame_host_) {
+    std::move(reply).Run(false);
+    return;
+  }
+  content::WebContents* web_contents =
+      content::WebContents::FromRenderFrameHost(frame_host_);
+  if (!web_contents) {
+    frame_host_ = nullptr;
+    std::move(reply).Run(false);
+    return;
+  }
+  Profile* profile =
+      Profile::FromBrowserContext(web_contents->GetBrowserContext());
+  if (!profile) {
+    std::move(reply).Run(false);
+    return;
+  }
+  std::move(reply).Run(CaptionControllerFactory::GetForProfile(profile)
+                           ->OnSpeechRecognitionReady(web_contents));
+}
 void CaptionHostImpl::OnTranscription(
    chrome::mojom::TranscriptionResultPtr transcription_result,
    OnTranscriptionCallback reply) {

--- a/chrome/browser/accessibility/caption_host_impl.h
+++ b/chrome/browser/accessibility/caption_host_impl.h
@@ -37,6 +37,8 @@ class CaptionHostImpl : public chrome::mojom::CaptionHost,
      mojo::PendingReceiver<chrome::mojom::CaptionHost> receiver);
  // chrome::mojom::CaptionHost:
+  void OnSpeechRecognitionReady(
+      OnSpeechRecognitionReadyCallback reply) override;
  void OnTranscription(
      chrome::mojom::TranscriptionResultPtr transcription_result,
      OnTranscriptionCallback reply) override;

--- a/chrome/browser/ui/caption_bubble_controller.h
+++ b/chrome/browser/ui/caption_bubble_controller.h
@@ -37,6 +37,8 @@ class CaptionBubbleController {
  static std::unique_ptr<CaptionBubbleController> Create(Browser* browser);
+  virtual bool OnSpeechRecognitionReady(content::WebContents* web_contents) = 0;
  // Called when a transcription is received from the service. Returns whether
  // the transcription result was set on the caption bubble successfully.
  // Transcriptions will halt if this returns false.

--- a/chrome/browser/ui/views/accessibility/caption_bubble.cc
+++ b/chrome/browser/ui/views/accessibility/caption_bubble.cc
@@ -539,6 +539,16 @@ void CaptionBubble::OnErrorChanged() {
  Redraw();
 }
+void CaptionBubble::OnReadyChanged() {
+  // There is a bug in RenderText in which the label text must not be empty when
+  // it is displayed, or otherwise subsequent calculation of the number of lines
+  // (CaptionBubble::GetNumLinesInLabel) will be incorrect. The label text here
+  // is set to a space character.
+  // TODO(1055150): Fix the bug in RenderText and then remove this workaround.
+  label_->SetText(base::ASCIIToUTF16("\u0020"));
+  UpdateBubbleAndWaitTextVisibility();
+}
 void CaptionBubble::OnIsExpandedChanged() {
  expand_button_->SetVisible(!is_expanded_);
  collapse_button_->SetVisible(is_expanded_);
@@ -566,14 +576,14 @@ void CaptionBubble::UpdateBubbleVisibility() {
    // Hide the widget if there is no room for it or the model is closed.
    if (GetWidget()->IsVisible())
      GetWidget()->Hide();
-  } else if (label_->GetText().size() > 0 || model_->HasError()) {
+  } else if (model_->IsReady() || model_->HasError()) {
-    // Show the widget if it has text or an error to display. Only show the
+    // Show the widget if it is ready to receive transcriptions or it has an
-    // widget if it isn't already visible. Always calling Widget::Show() will
+    // error to display. Only show the widget if it isn't already visible.
-    // mean the widget gets focus each time.
+    // Always calling Widget::Show() will mean the widget gets focus each time.
    if (!GetWidget()->IsVisible())
      GetWidget()->Show();
  } else if (GetWidget()->IsVisible()) {
-    // No text and no error. Hide it.
+    // Not ready and no error. Hide it.
    GetWidget()->Hide();
  }
 }

--- a/chrome/browser/ui/views/accessibility/caption_bubble.h
+++ b/chrome/browser/ui/views/accessibility/caption_bubble.h
@@ -103,6 +103,11 @@ class CaptionBubble : public views::BubbleDialogDelegateView,
  // the model has an error, otherwise displays the latest text.
  void OnErrorChanged();
+  // Called by the CaptionBubbleModel to notify this object that the model's
+  // on ready state has changed. Makes the caption bubble become visible and
+  // show the wait text.
+  void OnReadyChanged();
  // Called when the caption bubble expanded state has changed. Changes the
  // number of lines displayed.
  void OnIsExpandedChanged();

--- a/chrome/browser/ui/views/accessibility/caption_bubble_controller_views.cc
+++ b/chrome/browser/ui/views/accessibility/caption_bubble_controller_views.cc
@@ -69,6 +69,18 @@ void CaptionBubbleControllerViews::OnCaptionBubbleDestroyed() {
  browser_ = nullptr;
 }
+bool CaptionBubbleControllerViews::OnSpeechRecognitionReady(
+    content::WebContents* web_contents) {
+  if (!caption_bubble_ || !caption_bubble_models_.count(web_contents) ||
+      caption_bubble_models_[web_contents]->IsClosed())
+    return false;
+  CaptionBubbleModel* caption_bubble_model =
+      caption_bubble_models_[web_contents].get();
+  caption_bubble_model->OnReady();
+  return true;
+}
 bool CaptionBubbleControllerViews::OnTranscription(
    const chrome::mojom::TranscriptionResultPtr& transcription_result,
    content::WebContents* web_contents) {

--- a/chrome/browser/ui/views/accessibility/caption_bubble_controller_views.h
+++ b/chrome/browser/ui/views/accessibility/caption_bubble_controller_views.h
@@ -38,6 +38,8 @@ class CaptionBubbleControllerViews : public CaptionBubbleController,
  CaptionBubbleControllerViews& operator=(const CaptionBubbleControllerViews&) =
      delete;
+  bool OnSpeechRecognitionReady(content::WebContents* web_contents) override;
  // Called when a transcription is received from the service. Returns whether
  // the transcription result was set on the caption bubble successfully.
  // Transcriptions will halt if this returns false.

--- a/chrome/browser/ui/views/accessibility/caption_bubble_controller_views_browsertest.cc
+++ b/chrome/browser/ui/views/accessibility/caption_bubble_controller_views_browsertest.cc
--- a/chrome/browser/ui/views/accessibility/caption_bubble_model.cc
+++ b/chrome/browser/ui/views/accessibility/caption_bubble_model.cc
@@ -29,6 +29,7 @@ void CaptionBubbleModel::SetObserver(CaptionBubble* observer) {
    return;
  observer_ = observer;
  if (observer_) {
+    observer_->OnReadyChanged();
    observer_->OnTextChanged();
    observer_->OnErrorChanged();
  }
@@ -52,9 +53,23 @@ void CaptionBubbleModel::Close() {
  final_text_.clear();
  partial_text_.clear();
  is_closed_ = true;
+  is_ready_ = false;
  OnTextChanged();
 }
+void CaptionBubbleModel::OnReady() {
+  final_text_.clear();
+  partial_text_.clear();
+  is_ready_ = true;
+  // The label text must not be empty when it is displayed, so there is a
+  // special OnReadyChanged() function in the CaptionBubble that handles the
+  // on_ready state change.
+  // TODO(1055150): Fix the bug in RenderText and then change this to
+  // OnTextChanged().
+  if (observer_)
+    observer_->OnReadyChanged();
+}
 void CaptionBubbleModel::SetHasError(bool has_error) {
  has_error_ = has_error;
  if (observer_)
@@ -70,6 +85,7 @@ void CaptionBubbleModel::DidFinishNavigation(
  final_text_.clear();
  partial_text_.clear();
  is_closed_ = false;
+  is_ready_ = false;
  has_error_ = false;
  OnTextChanged();
 }

--- a/chrome/browser/ui/views/accessibility/caption_bubble_model.h
+++ b/chrome/browser/ui/views/accessibility/caption_bubble_model.h
@@ -57,7 +57,10 @@ class CaptionBubbleModel : public content::WebContentsObserver {
  // observer.
  void Close();
+  void OnReady();
  bool IsClosed() const { return is_closed_; }
+  bool IsReady() const { return is_ready_; }
  bool HasError() const { return has_error_; }
  std::string GetFullText() const { return final_text_ + partial_text_; }
@@ -76,6 +79,9 @@ class CaptionBubbleModel : public content::WebContentsObserver {
  // Whether the bubble has been closed by the user.
  bool is_closed_ = false;
+  // Whether bubble is ready to receive transcriptions.
+  bool is_ready_ = false;
  // Whether an error should be displayed one the bubble.
  bool has_error_ = false;

--- a/chrome/browser/ui/views/frame/browser_view_browsertest.cc
+++ b/chrome/browser/ui/views/frame/browser_view_browsertest.cc
@@ -397,9 +397,11 @@ IN_PROC_BROWSER_TEST_F(BrowserViewTest, F6CyclesThroughCaptionBubbleToo) {
          caption_controller->GetCaptionBubbleControllerForBrowser(browser()));
  EXPECT_FALSE(bubble_controller->GetFocusableCaptionBubble());
+  content::WebContents* contents =
+      browser()->tab_strip_model()->GetActiveWebContents();
+  caption_controller->OnSpeechRecognitionReady(contents);
  caption_controller->DispatchTranscription(
-      browser()->tab_strip_model()->GetActiveWebContents(),
+      contents, chrome::mojom::TranscriptionResult::New("Hello, world", false));
-      chrome::mojom::TranscriptionResult::New("Hello, world", false));
  // Now the caption bubble exists but is not focused.
  views::View* bubble = bubble_controller->GetFocusableCaptionBubble();
  EXPECT_TRUE(bubble);

--- a/chrome/common/caption.mojom
+++ b/chrome/common/caption.mojom
@@ -7,6 +7,9 @@ module chrome.mojom;
 // Browser process interface exposed to the renderer for communication about
 // the Live Caption feature.
 interface CaptionHost {
+  // Called when speech recognition service is ready to send transcriptions.
+  OnSpeechRecognitionReady() => (bool success);
  // Called when the speech recognition client receives a transcription from the
  // speech service. Returns whether the transcription result was received
  // successfully. Transcriptions will halt if this returns false.

--- a/chrome/renderer/media/chrome_speech_recognition_client.cc
+++ b/chrome/renderer/media/chrome_speech_recognition_client.cc
@@ -5,6 +5,7 @@
 #include "chrome/renderer/media/chrome_speech_recognition_client.h"
 #include <utility>
+#include <vector>
 #include "base/metrics/field_trial_params.h"
 #include "content/public/renderer/render_frame.h"
@@ -57,7 +58,6 @@ void ChromeSpeechRecognitionClient::OnRecognizerBound(
    bool is_multichannel_supported) {
  is_multichannel_supported_ = is_multichannel_supported;
  is_recognizer_bound_ = true;
  if (on_ready_callback_)
    std::move(on_ready_callback_).Run();
 }
@@ -102,11 +102,11 @@ void ChromeSpeechRecognitionClient::OnSpeechRecognitionRecognitionEvent(
  caption_host_->OnTranscription(
      chrome::mojom::TranscriptionResult::New(result->transcription,
                                              result->is_final),
-      base::BindOnce(&ChromeSpeechRecognitionClient::OnTranscriptionCallback,
+      base::BindOnce(&ChromeSpeechRecognitionClient::OnBrowserCallback,
                     base::Unretained(this)));
 }
-void ChromeSpeechRecognitionClient::OnTranscriptionCallback(bool success) {
+void ChromeSpeechRecognitionClient::OnBrowserCallback(bool success) {
  is_browser_requesting_transcription_ = success;
 }
@@ -145,6 +145,17 @@ void ChromeSpeechRecognitionClient::SendAudioToSpeechRecognitionService(
  if (IsSpeechRecognitionAvailable()) {
    speech_recognition_recognizer_->SendAudioToSpeechRecognitionService(
        std::move(audio_data));
+    // When the speech recognition client receives speech, it alerts the
+    // live caption host in the browser that it is ready so that the UI can
+    // display a message. This happens at the time of playing the video and not
+    // at the time of construction of this object.
+    if (!on_ready_message_sent_to_caption_host_) {
+      caption_host_->OnSpeechRecognitionReady(
+          base::BindOnce(&ChromeSpeechRecognitionClient::OnBrowserCallback,
+                         base::Unretained(this)));
+      on_ready_message_sent_to_caption_host_ = true;
+    }
  }
 }

--- a/chrome/renderer/media/chrome_speech_recognition_client.h
+++ b/chrome/renderer/media/chrome_speech_recognition_client.h
@@ -65,8 +65,8 @@ class ChromeSpeechRecognitionClient
  media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(
      scoped_refptr<media::AudioBuffer> buffer);
-  // Called as a response to sending a transcription to the browser.
+  // Called as a response to sending a message to the browser.
-  void OnTranscriptionCallback(bool success);
+  void OnBrowserCallback(bool success);
  media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(
      std::unique_ptr<media::AudioBus> audio_bus,
@@ -108,6 +108,9 @@ class ChromeSpeechRecognitionClient
  bool is_recognizer_bound_ = false;
+  // Whether or not the on ready message has been sent to the caption host.
+  bool on_ready_message_sent_to_caption_host_ = false;
  // The temporary audio bus used to mix multichannel audio into a single
  // channel.
  std::unique_ptr<media::AudioBus> monaural_audio_bus_;