Commit 80f9ca10 authored by Abigail Klein's avatar Abigail Klein Committed by Commit Bot

[Live Caption] Stop speech recognition when caption bubble is closed.

Introduce a reply to the OnTranscription function of the live caption
mojom interface. The reply is a boolean of success which denotes whether
the browser successfully received, routed, and set the transcription
result. If there was not success, because the caption bubble was closed,
the browser was closed, or the associated web contents did not match an
existing browser with a caption bubble, then the OnTranscription mojo
function returns false, causing speech recognition to stop.

Bug: 1055150
Change-Id: I8c8e0b443c8c90afa78e85e81e3ca7cda8a49920
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2225878Reviewed-by: default avatarAvi Drissman <avi@chromium.org>
Reviewed-by: default avatarKatie Dektar <katie@chromium.org>
Reviewed-by: default avatarDominic Mazzoni <dmazzoni@chromium.org>
Reviewed-by: default avatarAlex Gough <ajgo@chromium.org>
Commit-Queue: Abigail Klein <abigailbklein@google.com>
Cr-Commit-Position: refs/heads/master@{#776086}
parent 66dced4d
...@@ -163,14 +163,14 @@ void CaptionController::OnBrowserRemoved(Browser* browser) { ...@@ -163,14 +163,14 @@ void CaptionController::OnBrowserRemoved(Browser* browser) {
caption_bubble_controllers_.erase(browser); caption_bubble_controllers_.erase(browser);
} }
void CaptionController::DispatchTranscription( bool CaptionController::DispatchTranscription(
content::WebContents* web_contents, content::WebContents* web_contents,
const chrome::mojom::TranscriptionResultPtr& transcription_result) { const chrome::mojom::TranscriptionResultPtr& transcription_result) {
Browser* browser = chrome::FindBrowserWithWebContents(web_contents); Browser* browser = chrome::FindBrowserWithWebContents(web_contents);
if (!browser || !caption_bubble_controllers_.count(browser)) if (!browser || !caption_bubble_controllers_.count(browser))
return; return false;
caption_bubble_controllers_[browser]->OnTranscription(transcription_result, return caption_bubble_controllers_[browser]->OnTranscription(
web_contents); transcription_result, web_contents);
} }
CaptionBubbleController* CaptionBubbleController*
......
...@@ -68,8 +68,9 @@ class CaptionController : public BrowserListObserver, public KeyedService { ...@@ -68,8 +68,9 @@ class CaptionController : public BrowserListObserver, public KeyedService {
void Init(); void Init();
// Routes a transcription to the CaptionBubbleController that belongs to the // Routes a transcription to the CaptionBubbleController that belongs to the
// appropriate browser. // appropriate browser. Returns whether the transcription result was routed
void DispatchTranscription( // successfully. Transcriptions will halt if this returns false.
bool DispatchTranscription(
content::WebContents* web_contents, content::WebContents* web_contents,
const chrome::mojom::TranscriptionResultPtr& transcription_result); const chrome::mojom::TranscriptionResultPtr& transcription_result);
......
...@@ -40,21 +40,28 @@ CaptionHostImpl::CaptionHostImpl(content::RenderFrameHost* frame_host) ...@@ -40,21 +40,28 @@ CaptionHostImpl::CaptionHostImpl(content::RenderFrameHost* frame_host)
CaptionHostImpl::~CaptionHostImpl() = default; CaptionHostImpl::~CaptionHostImpl() = default;
void CaptionHostImpl::OnTranscription( void CaptionHostImpl::OnTranscription(
chrome::mojom::TranscriptionResultPtr transcription_result) { chrome::mojom::TranscriptionResultPtr transcription_result,
if (!frame_host_) OnTranscriptionCallback reply) {
if (!frame_host_) {
std::move(reply).Run(false);
return; return;
}
content::WebContents* web_contents = content::WebContents* web_contents =
content::WebContents::FromRenderFrameHost(frame_host_); content::WebContents::FromRenderFrameHost(frame_host_);
if (!web_contents) { if (!web_contents) {
frame_host_ = nullptr; frame_host_ = nullptr;
std::move(reply).Run(false);
return; return;
} }
Profile* profile = Profile* profile =
Profile::FromBrowserContext(web_contents->GetBrowserContext()); Profile::FromBrowserContext(web_contents->GetBrowserContext());
if (!profile) if (!profile) {
std::move(reply).Run(false);
return; return;
CaptionControllerFactory::GetForProfile(profile)->DispatchTranscription( }
web_contents, transcription_result); std::move(reply).Run(
CaptionControllerFactory::GetForProfile(profile)->DispatchTranscription(
web_contents, transcription_result));
} }
void CaptionHostImpl::RenderFrameDeleted(content::RenderFrameHost* frame_host) { void CaptionHostImpl::RenderFrameDeleted(content::RenderFrameHost* frame_host) {
......
...@@ -38,7 +38,8 @@ class CaptionHostImpl : public chrome::mojom::CaptionHost, ...@@ -38,7 +38,8 @@ class CaptionHostImpl : public chrome::mojom::CaptionHost,
// chrome::mojom::CaptionHost: // chrome::mojom::CaptionHost:
void OnTranscription( void OnTranscription(
chrome::mojom::TranscriptionResultPtr transcription_result) override; chrome::mojom::TranscriptionResultPtr transcription_result,
OnTranscriptionCallback reply) override;
// content::WebContentsObserver: // content::WebContentsObserver:
void RenderFrameDeleted(content::RenderFrameHost* frame_host) override; void RenderFrameDeleted(content::RenderFrameHost* frame_host) override;
......
...@@ -37,14 +37,16 @@ class CaptionBubbleController { ...@@ -37,14 +37,16 @@ class CaptionBubbleController {
static std::unique_ptr<CaptionBubbleController> Create(Browser* browser); static std::unique_ptr<CaptionBubbleController> Create(Browser* browser);
// Called when a transcription is received from the service. // Called when a transcription is received from the service. Returns whether
virtual void OnTranscription( // the transcription result was set on the caption bubble successfully.
// Transcriptions will halt if this returns false.
virtual bool OnTranscription(
const chrome::mojom::TranscriptionResultPtr& transcription_result, const chrome::mojom::TranscriptionResultPtr& transcription_result,
content::WebContents* web_contents) {} content::WebContents* web_contents) = 0;
// Called when the caption style changes. // Called when the caption style changes.
virtual void UpdateCaptionStyle( virtual void UpdateCaptionStyle(
base::Optional<ui::CaptionStyle> caption_style) {} base::Optional<ui::CaptionStyle> caption_style) = 0;
}; };
} // namespace captions } // namespace captions
......
...@@ -69,7 +69,7 @@ CaptionBubbleControllerViews::~CaptionBubbleControllerViews() { ...@@ -69,7 +69,7 @@ CaptionBubbleControllerViews::~CaptionBubbleControllerViews() {
void CaptionBubbleControllerViews::OnCaptionBubbleCloseClicked() { void CaptionBubbleControllerViews::OnCaptionBubbleCloseClicked() {
// Hide the caption bubble on the active tab. // Hide the caption bubble on the active tab.
caption_texts_[active_contents_].clear(); caption_bubble_models_[active_contents_].close();
// TODO(crbug.com/1051150): Ensure that caption bubble disappears on the tab // TODO(crbug.com/1051150): Ensure that caption bubble disappears on the tab
// if it is currently displaying an error message. // if it is currently displaying an error message.
SetCaptionBubbleText(); SetCaptionBubbleText();
...@@ -84,13 +84,14 @@ void CaptionBubbleControllerViews::OnCaptionBubbleDestroyed() { ...@@ -84,13 +84,14 @@ void CaptionBubbleControllerViews::OnCaptionBubbleDestroyed() {
browser_ = nullptr; browser_ = nullptr;
} }
void CaptionBubbleControllerViews::OnTranscription( bool CaptionBubbleControllerViews::OnTranscription(
const chrome::mojom::TranscriptionResultPtr& transcription_result, const chrome::mojom::TranscriptionResultPtr& transcription_result,
content::WebContents* web_contents) { content::WebContents* web_contents) {
if (!caption_bubble_) if (!caption_bubble_ || caption_bubble_models_[web_contents].is_closed)
return; return false;
std::string& partial_text = caption_texts_[web_contents].partial_text;
std::string& final_text = caption_texts_[web_contents].final_text; std::string& partial_text = caption_bubble_models_[web_contents].partial_text;
std::string& final_text = caption_bubble_models_[web_contents].final_text;
partial_text = transcription_result->transcription; partial_text = transcription_result->transcription;
SetCaptionBubbleText(); SetCaptionBubbleText();
...@@ -117,6 +118,7 @@ void CaptionBubbleControllerViews::OnTranscription( ...@@ -117,6 +118,7 @@ void CaptionBubbleControllerViews::OnTranscription(
SetCaptionBubbleText(); SetCaptionBubbleText();
} }
} }
return true;
} }
void CaptionBubbleControllerViews::OnTabStripModelChanged( void CaptionBubbleControllerViews::OnTabStripModelChanged(
...@@ -128,7 +130,7 @@ void CaptionBubbleControllerViews::OnTabStripModelChanged( ...@@ -128,7 +130,7 @@ void CaptionBubbleControllerViews::OnTabStripModelChanged(
if (!selection.active_tab_changed()) if (!selection.active_tab_changed())
return; return;
if (selection.selected_tabs_were_removed) if (selection.selected_tabs_were_removed)
caption_texts_.erase(selection.old_contents); caption_bubble_models_.erase(selection.old_contents);
active_contents_ = selection.new_contents; active_contents_ = selection.new_contents;
SetCaptionBubbleText(); SetCaptionBubbleText();
...@@ -136,8 +138,8 @@ void CaptionBubbleControllerViews::OnTabStripModelChanged( ...@@ -136,8 +138,8 @@ void CaptionBubbleControllerViews::OnTabStripModelChanged(
void CaptionBubbleControllerViews::SetCaptionBubbleText() { void CaptionBubbleControllerViews::SetCaptionBubbleText() {
std::string text; std::string text;
if (active_contents_ && caption_texts_.count(active_contents_)) if (active_contents_ && caption_bubble_models_.count(active_contents_))
text = caption_texts_[active_contents_].full_text(); text = caption_bubble_models_[active_contents_].full_text();
caption_bubble_->SetText(text); caption_bubble_->SetText(text);
} }
......
...@@ -20,13 +20,15 @@ namespace captions { ...@@ -20,13 +20,15 @@ namespace captions {
class CaptionBubble; class CaptionBubble;
struct CaptionText { struct CaptionBubbleModel {
std::string final_text; std::string final_text;
std::string partial_text; std::string partial_text;
bool is_closed = false;
void clear() { void close() {
final_text.clear(); final_text.clear();
partial_text.clear(); partial_text.clear();
is_closed = true;
} }
std::string full_text() { return final_text + partial_text; } std::string full_text() { return final_text + partial_text; }
...@@ -48,8 +50,10 @@ class CaptionBubbleControllerViews : public CaptionBubbleController, ...@@ -48,8 +50,10 @@ class CaptionBubbleControllerViews : public CaptionBubbleController,
CaptionBubbleControllerViews& operator=(const CaptionBubbleControllerViews&) = CaptionBubbleControllerViews& operator=(const CaptionBubbleControllerViews&) =
delete; delete;
// Called when a transcription is received from the service. // Called when a transcription is received from the service. Returns whether
void OnTranscription( // the transcription result was set on the caption bubble successfully.
// Transcriptions will halt if this returns false.
bool OnTranscription(
const chrome::mojom::TranscriptionResultPtr& transcription_result, const chrome::mojom::TranscriptionResultPtr& transcription_result,
content::WebContents* web_contents) override; content::WebContents* web_contents) override;
...@@ -93,7 +97,8 @@ class CaptionBubbleControllerViews : public CaptionBubbleController, ...@@ -93,7 +97,8 @@ class CaptionBubbleControllerViews : public CaptionBubbleController,
// final texts in order to show the latest partial text to a user when they // final texts in order to show the latest partial text to a user when they
// switch back to the tab in case the speech service has not sent a final // switch back to the tab in case the speech service has not sent a final
// transcription in a while. // transcription in a while.
std::unordered_map<content::WebContents*, CaptionText> caption_texts_; std::unordered_map<content::WebContents*, CaptionBubbleModel>
caption_bubble_models_;
}; };
} // namespace captions } // namespace captions
......
...@@ -106,14 +106,14 @@ class CaptionBubbleControllerViewsTest : public InProcessBrowserTest { ...@@ -106,14 +106,14 @@ class CaptionBubbleControllerViewsTest : public InProcessBrowserTest {
EXPECT_EQ(bubble_bounds.bottom(), anchor_bounds.bottom() - 48); EXPECT_EQ(bubble_bounds.bottom(), anchor_bounds.bottom() - 48);
} }
void OnPartialTranscription(std::string text, int tab_index = 0) { bool OnPartialTranscription(std::string text, int tab_index = 0) {
GetController()->OnTranscription( return GetController()->OnTranscription(
chrome::mojom::TranscriptionResult::New(text, false), chrome::mojom::TranscriptionResult::New(text, false),
browser()->tab_strip_model()->GetWebContentsAt(tab_index)); browser()->tab_strip_model()->GetWebContentsAt(tab_index));
} }
void OnFinalTranscription(std::string text, int tab_index = 0) { bool OnFinalTranscription(std::string text, int tab_index = 0) {
GetController()->OnTranscription( return GetController()->OnTranscription(
chrome::mojom::TranscriptionResult::New(text, true), chrome::mojom::TranscriptionResult::New(text, true),
browser()->tab_strip_model()->GetWebContentsAt(tab_index)); browser()->tab_strip_model()->GetWebContentsAt(tab_index));
} }
...@@ -343,12 +343,21 @@ IN_PROC_BROWSER_TEST_F(CaptionBubbleControllerViewsTest, ShowsAndHidesError) { ...@@ -343,12 +343,21 @@ IN_PROC_BROWSER_TEST_F(CaptionBubbleControllerViewsTest, ShowsAndHidesError) {
} }
IN_PROC_BROWSER_TEST_F(CaptionBubbleControllerViewsTest, CloseButtonCloses) { IN_PROC_BROWSER_TEST_F(CaptionBubbleControllerViewsTest, CloseButtonCloses) {
OnPartialTranscription("Elephants have 3-4 toenails per foot"); bool success = OnFinalTranscription("Elephants have 3-4 toenails per foot");
EXPECT_TRUE(success);
EXPECT_TRUE(GetCaptionWidget()); EXPECT_TRUE(GetCaptionWidget());
EXPECT_TRUE(GetCaptionWidget()->IsVisible()); EXPECT_TRUE(GetCaptionWidget()->IsVisible());
EXPECT_EQ("Elephants have 3-4 toenails per foot", GetLabelText());
ClickCloseButton(); ClickCloseButton();
EXPECT_TRUE(GetCaptionWidget()); EXPECT_TRUE(GetCaptionWidget());
EXPECT_FALSE(GetCaptionWidget()->IsVisible()); EXPECT_FALSE(GetCaptionWidget()->IsVisible());
success = OnFinalTranscription(
"Elephants wander 35 miles a day in search of water");
EXPECT_FALSE(success);
EXPECT_EQ("", GetLabelText());
// TODO(crbug.com/1055150): The caption bubble should reappear when the tab
// refreshes.
} }
IN_PROC_BROWSER_TEST_F(CaptionBubbleControllerViewsTest, IN_PROC_BROWSER_TEST_F(CaptionBubbleControllerViewsTest,
......
...@@ -8,8 +8,9 @@ module chrome.mojom; ...@@ -8,8 +8,9 @@ module chrome.mojom;
// the Live Caption feature. // the Live Caption feature.
interface CaptionHost { interface CaptionHost {
// Called when the speech recognition client receives a transcription from the // Called when the speech recognition client receives a transcription from the
// speech service. // speech service. Returns whether the transcription result was received
OnTranscription(TranscriptionResult transcription_result); // successfully. Transcriptions will halt if this returns false.
OnTranscription(TranscriptionResult transcription_result) => (bool success);
}; };
// A transcription result created by the speech recognition client in the // A transcription result created by the speech recognition client in the
......
...@@ -36,14 +36,22 @@ void ChromeSpeechRecognitionClient::AddAudio( ...@@ -36,14 +36,22 @@ void ChromeSpeechRecognitionClient::AddAudio(
} }
bool ChromeSpeechRecognitionClient::IsSpeechRecognitionAvailable() { bool ChromeSpeechRecognitionClient::IsSpeechRecognitionAvailable() {
return speech_recognition_recognizer_.is_bound() && return is_browser_requesting_transcription_ &&
speech_recognition_recognizer_.is_bound() &&
speech_recognition_recognizer_.is_connected(); speech_recognition_recognizer_.is_connected();
} }
void ChromeSpeechRecognitionClient::OnSpeechRecognitionRecognitionEvent( void ChromeSpeechRecognitionClient::OnSpeechRecognitionRecognitionEvent(
media::mojom::SpeechRecognitionResultPtr result) { media::mojom::SpeechRecognitionResultPtr result) {
caption_host_->OnTranscription(chrome::mojom::TranscriptionResult::New( caption_host_->OnTranscription(
result->transcription, result->is_final)); chrome::mojom::TranscriptionResult::New(result->transcription,
result->is_final),
base::BindOnce(&ChromeSpeechRecognitionClient::OnTranscriptionCallback,
base::Unretained(this)));
}
void ChromeSpeechRecognitionClient::OnTranscriptionCallback(bool success) {
is_browser_requesting_transcription_ = success;
} }
media::mojom::AudioDataS16Ptr media::mojom::AudioDataS16Ptr
......
...@@ -41,6 +41,9 @@ class ChromeSpeechRecognitionClient ...@@ -41,6 +41,9 @@ class ChromeSpeechRecognitionClient
media::mojom::AudioDataS16Ptr ConvertToAudioDataS16( media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(
scoped_refptr<media::AudioBuffer> buffer); scoped_refptr<media::AudioBuffer> buffer);
// Called as a response to sending a transcription to the browser.
void OnTranscriptionCallback(bool success);
mojo::Remote<media::mojom::SpeechRecognitionContext> mojo::Remote<media::mojom::SpeechRecognitionContext>
speech_recognition_context_; speech_recognition_context_;
mojo::Remote<media::mojom::SpeechRecognitionRecognizer> mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
...@@ -52,6 +55,9 @@ class ChromeSpeechRecognitionClient ...@@ -52,6 +55,9 @@ class ChromeSpeechRecognitionClient
// The temporary audio bus used to convert the raw audio to the appropriate // The temporary audio bus used to convert the raw audio to the appropriate
// format. // format.
std::unique_ptr<media::AudioBus> temp_audio_bus_; std::unique_ptr<media::AudioBus> temp_audio_bus_;
// Whether the browser is still requesting transcriptions.
bool is_browser_requesting_transcription_ = true;
}; };
#endif // CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_ #endif // CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment