Commit b450e909 authored by primiano@chromium.org's avatar primiano@chromium.org

Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)


BUG=116954
TEST=none.


Review URL: http://codereview.chromium.org/9972011

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@133967 0039d316-1c4b-4281-b951-d872f2087c98
parent 2e526f05
...@@ -16,8 +16,11 @@ ...@@ -16,8 +16,11 @@
#include "chrome/browser/tab_contents/tab_util.h" #include "chrome/browser/tab_contents/tab_util.h"
#include "chrome/common/pref_names.h" #include "chrome/common/pref_names.h"
#include "content/public/browser/browser_thread.h" #include "content/public/browser/browser_thread.h"
#include "content/public/browser/render_view_host.h"
#include "content/public/browser/render_view_host_delegate.h"
#include "content/public/browser/resource_context.h" #include "content/public/browser/resource_context.h"
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/speech_recognition_error.h" #include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_result.h" #include "content/public/common/speech_recognition_result.h"
#include "grit/generated_resources.h" #include "grit/generated_resources.h"
...@@ -29,6 +32,7 @@ ...@@ -29,6 +32,7 @@
using content::BrowserThread; using content::BrowserThread;
using content::SpeechRecognitionManager; using content::SpeechRecognitionManager;
using content::SpeechRecognitionSessionContext;
namespace speech { namespace speech {
...@@ -105,17 +109,18 @@ ChromeSpeechRecognitionManagerDelegate:: ...@@ -105,17 +109,18 @@ ChromeSpeechRecognitionManagerDelegate::
} }
void ChromeSpeechRecognitionManagerDelegate::ShowRecognitionRequested( void ChromeSpeechRecognitionManagerDelegate::ShowRecognitionRequested(
int session_id, int session_id) {
int render_process_id, const SpeechRecognitionSessionContext& context =
int render_view_id, SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
const gfx::Rect& element_rect) { bubble_controller_->CreateBubble(session_id,
bubble_controller_->CreateBubble(session_id, render_process_id, context.render_process_id,
render_view_id, element_rect); context.render_view_id,
context.element_rect);
} }
void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo( void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation(
bool* can_report_metrics, bool* can_report_metrics,
std::string* request_info) { std::string* hardware_info) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
if (!optional_request_info_.get()) { if (!optional_request_info_.get()) {
optional_request_info_ = new OptionalRequestInfo(); optional_request_info_ = new OptionalRequestInfo();
...@@ -129,7 +134,24 @@ void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo( ...@@ -129,7 +134,24 @@ void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo(
optional_request_info_->Refresh(); optional_request_info_->Refresh();
} }
*can_report_metrics = optional_request_info_->can_report_metrics(); *can_report_metrics = optional_request_info_->can_report_metrics();
*request_info = optional_request_info_->value(); *hardware_info = optional_request_info_->value();
}
void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
const SpeechRecognitionSessionContext& context =
SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
// The check must be performed in the UI thread. We defer it posting to
// CheckRenderViewType, which will issue the callback on our behalf.
BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
base::Bind(&CheckRenderViewType,
session_id,
callback,
context.render_process_id,
context.render_view_id));
} }
void ChromeSpeechRecognitionManagerDelegate::ShowWarmUp(int session_id) { void ChromeSpeechRecognitionManagerDelegate::ShowWarmUp(int session_id) {
...@@ -149,51 +171,38 @@ void ChromeSpeechRecognitionManagerDelegate::ShowInputVolume( ...@@ -149,51 +171,38 @@ void ChromeSpeechRecognitionManagerDelegate::ShowInputVolume(
bubble_controller_->SetBubbleInputVolume(session_id, volume, noise_volume); bubble_controller_->SetBubbleInputVolume(session_id, volume, noise_volume);
} }
void ChromeSpeechRecognitionManagerDelegate::ShowMicError(int session_id, void ChromeSpeechRecognitionManagerDelegate::ShowError(
MicError error) { int session_id, const content::SpeechRecognitionError& error) {
switch (error) { int error_message_id = 0;
case MIC_ERROR_NO_DEVICE_AVAILABLE: switch (error.code) {
bubble_controller_->SetBubbleMessage( case content::SPEECH_RECOGNITION_ERROR_AUDIO:
session_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC)); switch (error.details) {
case content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC:
error_message_id = IDS_SPEECH_INPUT_NO_MIC;
break;
case content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE:
error_message_id = IDS_SPEECH_INPUT_MIC_IN_USE;
break;
default:
error_message_id = IDS_SPEECH_INPUT_MIC_ERROR;
break;
}
break; break;
case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH:
case MIC_ERROR_DEVICE_IN_USE: error_message_id = IDS_SPEECH_INPUT_NO_SPEECH;
bubble_controller_->SetBubbleMessage( break;
session_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_MIC_IN_USE)); case content::SPEECH_RECOGNITION_ERROR_NO_MATCH:
error_message_id = IDS_SPEECH_INPUT_NO_RESULTS;
break;
case content::SPEECH_RECOGNITION_ERROR_NETWORK:
error_message_id = IDS_SPEECH_INPUT_NET_ERROR;
break; break;
default: default:
NOTREACHED(); NOTREACHED() << "unknown error " << error.code;
}
}
void ChromeSpeechRecognitionManagerDelegate::ShowRecognizerError(
int session_id, content::SpeechRecognitionErrorCode error) {
struct ErrorMessageMapEntry {
content::SpeechRecognitionErrorCode error;
int message_id;
};
ErrorMessageMapEntry error_message_map[] = {
{
content::SPEECH_RECOGNITION_ERROR_AUDIO, IDS_SPEECH_INPUT_MIC_ERROR
}, {
content::SPEECH_RECOGNITION_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
}, {
content::SPEECH_RECOGNITION_ERROR_NO_MATCH, IDS_SPEECH_INPUT_NO_RESULTS
}, {
content::SPEECH_RECOGNITION_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
}
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
if (error_message_map[i].error == error) {
bubble_controller_->SetBubbleMessage(
session_id,
l10n_util::GetStringUTF16(error_message_map[i].message_id));
return; return;
}
} }
bubble_controller_->SetBubbleMessage(
NOTREACHED() << "unknown error " << error; session_id, l10n_util::GetStringUTF16(error_message_id));
} }
void ChromeSpeechRecognitionManagerDelegate::DoClose(int session_id) { void ChromeSpeechRecognitionManagerDelegate::DoClose(int session_id) {
...@@ -205,18 +214,40 @@ void ChromeSpeechRecognitionManagerDelegate::InfoBubbleButtonClicked( ...@@ -205,18 +214,40 @@ void ChromeSpeechRecognitionManagerDelegate::InfoBubbleButtonClicked(
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
if (button == SpeechRecognitionBubble::BUTTON_CANCEL) { if (button == SpeechRecognitionBubble::BUTTON_CANCEL) {
SpeechRecognitionManager::GetInstance()->CancelRecognitionForRequest( SpeechRecognitionManager::GetInstance()->AbortSession(session_id);
session_id);
} else if (button == SpeechRecognitionBubble::BUTTON_TRY_AGAIN) { } else if (button == SpeechRecognitionBubble::BUTTON_TRY_AGAIN) {
SpeechRecognitionManager::GetInstance()->StartRecognitionForRequest( SpeechRecognitionManager::GetInstance()->StartSession(session_id);
session_id);
} }
} }
void ChromeSpeechRecognitionManagerDelegate::InfoBubbleFocusChanged( void ChromeSpeechRecognitionManagerDelegate::InfoBubbleFocusChanged(
int session_id) { int session_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
SpeechRecognitionManager::GetInstance()->FocusLostForRequest(session_id); SpeechRecognitionManager::GetInstance()->SendSessionToBackground(session_id);
}
void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback,
int render_process_id,
int render_view_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
const content::RenderViewHost* render_view_host =
content::RenderViewHost::FromID(render_process_id, render_view_id);
// For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show
// a popup, including the speech input bubble. In these cases for privacy
// reasons we don't want to start recording if the user can't be properly
// notified. An example of this is trying to show the speech input bubble
// within an extension popup: http://crbug.com/92083. In these situations the
// speech input extension API should be used instead.
const bool allowed = (render_view_host != NULL &&
render_view_host->GetDelegate() != NULL &&
render_view_host->GetDelegate()->GetRenderViewType() ==
content::VIEW_TYPE_WEB_CONTENTS);
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
base::Bind(callback, session_id, allowed));
} }
} // namespace speech } // namespace speech
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
namespace speech { namespace speech {
// This is Chrome's implementation of the SpeechRecognitionManager interface. // This is Chrome's implementation of the SpeechRecognitionManagerDelegate
// This class is a singleton and accessed via the Get method. // interface.
class ChromeSpeechRecognitionManagerDelegate class ChromeSpeechRecognitionManagerDelegate
: NON_EXPORTED_BASE(public content::SpeechRecognitionManagerDelegate), : NON_EXPORTED_BASE(public content::SpeechRecognitionManagerDelegate),
public SpeechRecognitionBubbleControllerDelegate { public SpeechRecognitionBubbleControllerDelegate {
...@@ -28,27 +28,33 @@ class ChromeSpeechRecognitionManagerDelegate ...@@ -28,27 +28,33 @@ class ChromeSpeechRecognitionManagerDelegate
protected: protected:
// SpeechRecognitionManagerDelegate methods. // SpeechRecognitionManagerDelegate methods.
virtual void GetRequestInfo(bool* can_report_metrics, virtual void GetDiagnosticInformation(bool* can_report_metrics,
std::string* request_info) OVERRIDE; std::string* hardware_info) OVERRIDE;
virtual void ShowRecognitionRequested(int session_id, virtual void CheckRecognitionIsAllowed(
int render_process_id, int session_id,
int render_view_id, base::Callback<void(int session_id, bool is_allowed)> callback) OVERRIDE;
const gfx::Rect& element_rect) OVERRIDE; virtual void ShowRecognitionRequested(int session_id) OVERRIDE;
virtual void ShowWarmUp(int session_id) OVERRIDE; virtual void ShowWarmUp(int session_id) OVERRIDE;
virtual void ShowRecognizing(int session_id) OVERRIDE; virtual void ShowRecognizing(int session_id) OVERRIDE;
virtual void ShowRecording(int session_id) OVERRIDE; virtual void ShowRecording(int session_id) OVERRIDE;
virtual void ShowInputVolume(int session_id, virtual void ShowInputVolume(int session_id,
float volume, float volume,
float noise_volume) OVERRIDE; float noise_volume) OVERRIDE;
virtual void ShowMicError(int session_id, virtual void ShowError(int session_id,
MicError error) OVERRIDE; const content::SpeechRecognitionError& error) OVERRIDE;
virtual void ShowRecognizerError(
int session_id, content::SpeechRecognitionErrorCode error) OVERRIDE;
virtual void DoClose(int session_id) OVERRIDE; virtual void DoClose(int session_id) OVERRIDE;
private: private:
class OptionalRequestInfo; class OptionalRequestInfo;
// Checks for VIEW_TYPE_WEB_CONTENTS host in the UI thread and notifies back
// the result in the IO thread through |callback|.
static void CheckRenderViewType(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback,
int render_process_id,
int render_view_id);
scoped_refptr<SpeechRecognitionBubbleController> bubble_controller_; scoped_refptr<SpeechRecognitionBubbleController> bubble_controller_;
scoped_refptr<OptionalRequestInfo> optional_request_info_; scoped_refptr<OptionalRequestInfo> optional_request_info_;
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "base/memory/scoped_ptr.h" #include "base/memory/scoped_ptr.h"
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "content/public/browser/browser_message_filter.h" #include "content/public/browser/browser_message_filter.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "net/url_request/url_request_context_getter.h" #include "net/url_request/url_request_context_getter.h"
struct InputTagSpeechHostMsg_StartRecognition_Params; struct InputTagSpeechHostMsg_StartRecognition_Params;
...@@ -17,32 +18,37 @@ class SpeechRecognitionPreferences; ...@@ -17,32 +18,37 @@ class SpeechRecognitionPreferences;
struct SpeechRecognitionResult; struct SpeechRecognitionResult;
} }
namespace media {
class AudioManager;
}
namespace speech { namespace speech {
class SpeechRecognitionManagerImpl; class SpeechRecognitionManagerImpl;
// InputTagSpeechDispatcherHost is a delegate for Speech API messages used by // InputTagSpeechDispatcherHost is a delegate for Speech API messages used by
// RenderMessageFilter. // RenderMessageFilter. Basically it acts as a proxy, relaying the events coming
// It's the complement of InputTagSpeechDispatcher (owned by RenderView). // from the SpeechRecognitionManager to IPC messages (and vice versa).
// It's the complement of SpeechRecognitionDispatcher (owned by RenderView).
class CONTENT_EXPORT InputTagSpeechDispatcherHost class CONTENT_EXPORT InputTagSpeechDispatcherHost
: public content::BrowserMessageFilter { : public content::BrowserMessageFilter,
public content::SpeechRecognitionEventListener {
public: public:
class Sessions;
InputTagSpeechDispatcherHost( InputTagSpeechDispatcherHost(
int render_process_id, int render_process_id,
net::URLRequestContextGetter* context_getter, net::URLRequestContextGetter* url_request_context_getter,
content::SpeechRecognitionPreferences* recognition_preferences); content::SpeechRecognitionPreferences* recognition_preferences);
// Methods called by SpeechRecognitionManagerImpl. // SpeechRecognitionEventListener methods.
void SetRecognitionResult(int session_id, virtual void OnRecognitionStart(int session_id) OVERRIDE;
const content::SpeechRecognitionResult& result); virtual void OnAudioStart(int session_id) OVERRIDE;
void DidCompleteRecording(int session_id); virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
void DidCompleteRecognition(int session_id); virtual void OnSoundStart(int session_id) OVERRIDE;
virtual void OnSoundEnd(int session_id) OVERRIDE;
virtual void OnAudioEnd(int session_id) OVERRIDE;
virtual void OnRecognitionEnd(int session_id) OVERRIDE;
virtual void OnRecognitionResult(
int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
virtual void OnRecognitionError(
int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
virtual void OnAudioLevelsChange(
int session_id, float volume, float noise_volume) OVERRIDE;
// content::BrowserMessageFilter implementation. // content::BrowserMessageFilter implementation.
virtual bool OnMessageReceived(const IPC::Message& message, virtual bool OnMessageReceived(const IPC::Message& message,
...@@ -66,7 +72,7 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost ...@@ -66,7 +72,7 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost
int render_process_id_; int render_process_id_;
bool may_have_pending_requests_; // Set if we received any speech IPC request bool may_have_pending_requests_; // Set if we received any speech IPC request
scoped_refptr<net::URLRequestContextGetter> context_getter_; scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
scoped_refptr<content::SpeechRecognitionPreferences> recognition_preferences_; scoped_refptr<content::SpeechRecognitionPreferences> recognition_preferences_;
static SpeechRecognitionManagerImpl* manager_; static SpeechRecognitionManagerImpl* manager_;
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "base/bind.h" #include "base/bind.h"
#include "base/command_line.h" #include "base/command_line.h"
#include "base/file_path.h" #include "base/file_path.h"
#include "base/memory/scoped_ptr.h"
#include "base/string_number_conversions.h" #include "base/string_number_conversions.h"
#include "base/synchronization/waitable_event.h" #include "base/synchronization/waitable_event.h"
#include "base/utf_string_conversions.h" #include "base/utf_string_conversions.h"
...@@ -16,11 +17,15 @@ ...@@ -16,11 +17,15 @@
#include "content/browser/speech/speech_recognition_manager_impl.h" #include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/browser/web_contents/web_contents_impl.h" #include "content/browser/web_contents/web_contents_impl.h"
#include "content/public/browser/notification_types.h" #include "content/public/browser/notification_types.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/content_switches.h" #include "content/public/common/content_switches.h"
#include "content/public/common/speech_recognition_error.h" #include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_result.h" #include "content/public/common/speech_recognition_result.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h"
using content::SpeechRecognitionEventListener;
using content::SpeechRecognitionSessionContext;
using content::NavigationController; using content::NavigationController;
using content::WebContents; using content::WebContents;
...@@ -36,7 +41,7 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { ...@@ -36,7 +41,7 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
public: public:
FakeSpeechRecognitionManager() FakeSpeechRecognitionManager()
: session_id_(0), : session_id_(0),
delegate_(NULL), listener_(NULL),
did_cancel_all_(false), did_cancel_all_(false),
should_send_fake_response_(true), should_send_fake_response_(true),
recognition_started_event_(false, false) { recognition_started_event_(false, false) {
...@@ -63,23 +68,24 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { ...@@ -63,23 +68,24 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
} }
// SpeechRecognitionManager methods. // SpeechRecognitionManager methods.
virtual void StartRecognition( virtual int CreateSession(
InputTagSpeechDispatcherHost* delegate, const content::SpeechRecognitionSessionConfig& config,
int session_id, SpeechRecognitionEventListener* event_listener) OVERRIDE {
int render_process_id, VLOG(1) << "FAKE CreateSession invoked.";
int render_view_id,
const gfx::Rect& element_rect,
const std::string& language,
const std::string& grammar,
const std::string& origin_url,
net::URLRequestContextGetter* context_getter,
content::SpeechRecognitionPreferences* recognition_prefs) OVERRIDE {
VLOG(1) << "StartRecognition invoked.";
EXPECT_EQ(0, session_id_); EXPECT_EQ(0, session_id_);
EXPECT_EQ(NULL, delegate_); EXPECT_EQ(NULL, listener_);
session_id_ = session_id; listener_ = event_listener;
delegate_ = delegate; grammar_ = config.grammar;
grammar_ = grammar; session_ctx_ = config.initial_context;
session_id_ = 1;
return session_id_;
}
virtual void StartSession(int session_id) OVERRIDE {
VLOG(1) << "FAKE StartSession invoked.";
EXPECT_EQ(session_id, session_id_);
EXPECT_TRUE(listener_ != NULL);
if (should_send_fake_response_) { if (should_send_fake_response_) {
// Give the fake result in a short while. // Give the fake result in a short while.
MessageLoop::current()->PostTask(FROM_HERE, base::Bind( MessageLoop::current()->PostTask(FROM_HERE, base::Bind(
...@@ -93,45 +99,69 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl { ...@@ -93,45 +99,69 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
} }
recognition_started_event_.Signal(); recognition_started_event_.Signal();
} }
virtual void CancelRecognition(int session_id) OVERRIDE {
VLOG(1) << "CancelRecognition invoked."; virtual void AbortSession(int session_id) OVERRIDE {
VLOG(1) << "FAKE AbortSession invoked.";
EXPECT_EQ(session_id_, session_id); EXPECT_EQ(session_id_, session_id);
session_id_ = 0; session_id_ = 0;
delegate_ = NULL; listener_ = NULL;
} }
virtual void StopRecording(int session_id) OVERRIDE {
virtual void StopAudioCaptureForSession(int session_id) OVERRIDE {
VLOG(1) << "StopRecording invoked."; VLOG(1) << "StopRecording invoked.";
EXPECT_EQ(session_id_, session_id); EXPECT_EQ(session_id_, session_id);
// Nothing to do here since we aren't really recording. // Nothing to do here since we aren't really recording.
} }
virtual void CancelAllRequestsWithDelegate(
InputTagSpeechDispatcherHost* delegate) OVERRIDE { virtual void AbortAllSessionsForListener(
content::SpeechRecognitionEventListener* listener) OVERRIDE {
VLOG(1) << "CancelAllRequestsWithDelegate invoked."; VLOG(1) << "CancelAllRequestsWithDelegate invoked.";
// delegate_ is set to NULL if a fake result was received (see below), so // listener_ is set to NULL if a fake result was received (see below), so
// check that delegate_ matches the incoming parameter only when there is // check that listener_ matches the incoming parameter only when there is
// no fake result sent. // no fake result sent.
EXPECT_TRUE(should_send_fake_response_ || delegate_ == delegate); EXPECT_TRUE(should_send_fake_response_ || listener_ == listener);
did_cancel_all_ = true; did_cancel_all_ = true;
} }
virtual void SendSessionToBackground(int session_id) OVERRIDE {}
virtual bool HasAudioInputDevices() OVERRIDE { return true; }
virtual bool IsCapturingAudio() OVERRIDE { return true; }
virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); }
virtual void ShowAudioInputSettings() OVERRIDE {}
virtual int LookupSessionByContext(
base::Callback<bool(
const content::SpeechRecognitionSessionContext&)> matcher)
const OVERRIDE {
bool matched = matcher.Run(session_ctx_);
return matched ? session_id_ : 0;
}
virtual content::SpeechRecognitionSessionContext GetSessionContext(
int session_id) const OVERRIDE {
EXPECT_EQ(session_id, session_id_);
return session_ctx_;
}
private: private:
void SetFakeRecognitionResult() { void SetFakeRecognitionResult() {
if (session_id_) { // Do a check in case we were cancelled.. if (session_id_) { // Do a check in case we were cancelled..
VLOG(1) << "Setting fake recognition result."; VLOG(1) << "Setting fake recognition result.";
delegate_->DidCompleteRecording(session_id_); listener_->OnAudioEnd(session_id_);
content::SpeechRecognitionResult results; content::SpeechRecognitionResult results;
results.hypotheses.push_back(content::SpeechRecognitionHypothesis( results.hypotheses.push_back(content::SpeechRecognitionHypothesis(
ASCIIToUTF16(kTestResult), 1.0)); ASCIIToUTF16(kTestResult), 1.0));
delegate_->SetRecognitionResult(session_id_, results); listener_->OnRecognitionResult(session_id_, results);
delegate_->DidCompleteRecognition(session_id_); listener_->OnRecognitionEnd(session_id_);
session_id_ = 0; session_id_ = 0;
delegate_ = NULL; listener_ = NULL;
VLOG(1) << "Finished setting fake recognition result."; VLOG(1) << "Finished setting fake recognition result.";
} }
} }
int session_id_; int session_id_;
InputTagSpeechDispatcherHost* delegate_; SpeechRecognitionEventListener* listener_;
SpeechRecognitionSessionContext session_ctx_;
std::string grammar_; std::string grammar_;
bool did_cancel_all_; bool did_cancel_all_;
bool should_send_fake_response_; bool should_send_fake_response_;
......
...@@ -9,69 +9,66 @@ ...@@ -9,69 +9,66 @@
#include <string> #include <string>
#include "base/basictypes.h" #include "base/basictypes.h"
#include "base/callback.h"
#include "base/compiler_specific.h" #include "base/compiler_specific.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h" #include "base/memory/singleton.h"
#include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "ui/gfx/rect.h" #include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/speech_recognition_error.h"
namespace content { namespace content {
class ResourceContext;
class SpeechRecognitionManagerDelegate; class SpeechRecognitionManagerDelegate;
class SpeechRecognitionPreferences;
struct SpeechRecognitionResult;
class SpeechRecognizer;
}
namespace net {
class URLRequestContextGetter;
} }
namespace speech { namespace speech {
class InputTagSpeechDispatcherHost; class SpeechRecognizerImpl;
class CONTENT_EXPORT SpeechRecognitionManagerImpl // This is the manager for speech recognition. It is a singleton instance in
: NON_EXPORTED_BASE(public content::SpeechRecognitionManager), // the browser process and can serve several requests. Each recognition request
NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { // corresponds to a session, initiated via |CreateSession|.
// In every moment the manager has at most one "interactive" session (identified
// by |interactive_session_id_|), that is the session that is currently holding
// user attention. For privacy reasons, only the interactive session is allowed
// to capture audio from the microphone. However, after audio capture is
// completed, a session can be sent to background and can live in parallel with
// other sessions, while waiting for its results.
//
// More in details, SpeechRecognitionManager has the following responsibilities:
// - Handles requests received from various render views and makes sure only
// one of them accesses the audio device at any given time.
// - Relays recognition results/status/error events of each session to the
// corresponding listener (demuxing on the base of their session_id).
// - Handles the instantiation of SpeechRecognitionEngine objects when
// requested by SpeechRecognitionSessions.
class CONTENT_EXPORT SpeechRecognitionManagerImpl :
public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {
public: public:
static SpeechRecognitionManagerImpl* GetInstance(); static SpeechRecognitionManagerImpl* GetInstance();
// SpeechRecognitionManager implementation: // SpeechRecognitionManager implementation.
virtual void StartRecognitionForRequest(int session_id) OVERRIDE; virtual int CreateSession(
virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; const content::SpeechRecognitionSessionConfig& config,
virtual void FocusLostForRequest(int session_id) OVERRIDE; SpeechRecognitionEventListener* event_listener) OVERRIDE;
virtual void StartSession(int session_id) OVERRIDE;
virtual void AbortSession(int session_id) OVERRIDE;
virtual void AbortAllSessionsForListener(
content::SpeechRecognitionEventListener* listener) OVERRIDE;
virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
virtual void SendSessionToBackground(int session_id) OVERRIDE;
virtual content::SpeechRecognitionSessionContext GetSessionContext(
int session_id) const OVERRIDE;
virtual int LookupSessionByContext(
base::Callback<bool(
const content::SpeechRecognitionSessionContext&)> matcher)
const OVERRIDE;
virtual bool HasAudioInputDevices() OVERRIDE; virtual bool HasAudioInputDevices() OVERRIDE;
virtual bool IsCapturingAudio() OVERRIDE; virtual bool IsCapturingAudio() OVERRIDE;
virtual string16 GetAudioInputDeviceModel() OVERRIDE; virtual string16 GetAudioInputDeviceModel() OVERRIDE;
virtual void ShowAudioInputSettings() OVERRIDE; virtual void ShowAudioInputSettings() OVERRIDE;
// Handlers for requests from render views.
// |delegate| is a weak pointer and should remain valid until
// its |DidCompleteRecognition| method is called or recognition is cancelled.
// |render_process_id| is the ID of the renderer process initiating the
// request.
// |element_rect| is the display bounds of the html element requesting speech
// input (in page coordinates).
virtual void StartRecognition(
InputTagSpeechDispatcherHost* delegate,
int session_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect,
const std::string& language,
const std::string& grammar,
const std::string& origin_url,
net::URLRequestContextGetter* context_getter,
content::SpeechRecognitionPreferences* speech_recognition_prefs);
virtual void CancelRecognition(int session_id);
virtual void CancelAllRequestsWithDelegate(
InputTagSpeechDispatcherHost* delegate);
virtual void StopRecording(int session_id);
// SpeechRecognitionEventListener methods. // SpeechRecognitionEventListener methods.
virtual void OnRecognitionStart(int session_id) OVERRIDE; virtual void OnRecognitionStart(int session_id) OVERRIDE;
virtual void OnAudioStart(int session_id) OVERRIDE; virtual void OnAudioStart(int session_id) OVERRIDE;
...@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl ...@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
virtual void OnRecognitionError( virtual void OnRecognitionError(
int session_id, const content::SpeechRecognitionError& error) OVERRIDE; int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
virtual void OnAudioLevelsChange( virtual void OnAudioLevelsChange(int session_id, float volume,
int session_id, float volume, float noise_volume) OVERRIDE; float noise_volume) OVERRIDE;
protected: protected:
// Private constructor to enforce singleton. // Private constructor to enforce singleton.
...@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl ...@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
SpeechRecognitionManagerImpl(); SpeechRecognitionManagerImpl();
virtual ~SpeechRecognitionManagerImpl(); virtual ~SpeechRecognitionManagerImpl();
bool HasPendingRequest(int session_id) const;
private: private:
struct Request { // Data types for the internal Finite State Machine (FSM).
Request(); enum FSMState {
~Request(); STATE_IDLE = 0,
STATE_INTERACTIVE,
STATE_BACKGROUND,
STATE_WAITING_FOR_DELETION,
STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION
};
InputTagSpeechDispatcherHost* delegate; enum FSMEvent {
scoped_refptr<content::SpeechRecognizer> recognizer; EVENT_ABORT = 0,
bool is_active; // Set to true when recording or recognition is going on. EVENT_START,
EVENT_STOP_CAPTURE,
EVENT_SET_BACKGROUND,
EVENT_RECOGNITION_ENDED,
EVENT_RECOGNITION_RESULT,
EVENT_RECOGNITION_ERROR,
EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR
}; };
struct SpeechRecognitionParams; struct Session {
Session();
~Session();
InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; int id;
content::SpeechRecognitionEventListener* event_listener;
content::SpeechRecognitionSessionContext context;
scoped_refptr<SpeechRecognizerImpl> recognizer;
FSMState state;
bool error_occurred;
};
void CheckRenderViewTypeAndStartRecognition( struct FSMEventArgs {
const SpeechRecognitionParams& params); explicit FSMEventArgs(FSMEvent event_value);
void ProceedStartingRecognition(const SpeechRecognitionParams& params); ~FSMEventArgs();
void CancelRecognitionAndInformDelegate(int session_id); FSMEvent event;
content::SpeechRecognitionError speech_error;
};
typedef std::map<int, Request> SpeechRecognizerMap; // Callback issued by the SpeechRecognitionManagerDelegate for reporting
SpeechRecognizerMap requests_; // asynchronously the result of the CheckRecognitionIsAllowed call.
std::string request_info_; void RecognitionAllowedCallback(int session_id, bool is_allowed);
bool can_report_metrics_;
int recording_session_id_; // Entry point for pushing any external event into the session handling FSM.
scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; void DispatchEvent(int session_id, FSMEventArgs args);
// Defines the behavior of the session handling FSM, selecting the appropriate
// transition according to the session, its current state and the event.
FSMState ExecuteTransitionAndGetNextState(Session& session,
const FSMEventArgs& event_args);
// The methods below handle transitions of the session handling FSM.
FSMState SessionStart(Session& session, const FSMEventArgs& event_args);
FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);
FSMState SessionStopAudioCapture(Session& session,
const FSMEventArgs& event_args);
FSMState SessionAbortIfCapturingAudioOrBackground(
Session& session, const FSMEventArgs& event_args);
FSMState SessionSetBackground(Session& session,
const FSMEventArgs& event_args);
FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);
FSMState SessionReportNoMatch(Session& session,
const FSMEventArgs& event_args);
FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);
FSMState DoNothing(Session& session, const FSMEventArgs& event_args);
FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);
bool SessionExists(int session_id) const;
content::SpeechRecognitionEventListener* GetListener(int session_id) const;
int GetNextSessionID();
typedef std::map<int, Session> SessionsTable;
SessionsTable sessions_;
int interactive_session_id_;
int last_session_id_;
bool is_dispatching_event_;
content::SpeechRecognitionManagerDelegate* delegate_;
}; };
} // namespace speech } // namespace speech
......
...@@ -133,6 +133,9 @@ ...@@ -133,6 +133,9 @@
'public/browser/speech_recognition_manager.h', 'public/browser/speech_recognition_manager.h',
'public/browser/speech_recognition_manager_delegate.h', 'public/browser/speech_recognition_manager_delegate.h',
'public/browser/speech_recognition_preferences.h', 'public/browser/speech_recognition_preferences.h',
'public/browser/speech_recognition_session_config.cc',
'public/browser/speech_recognition_session_config.h',
'public/browser/speech_recognition_session_context.h',
'public/browser/speech_recognizer.h', 'public/browser/speech_recognizer.h',
'public/browser/trace_controller.h', 'public/browser/trace_controller.h',
'public/browser/trace_subscriber.h', 'public/browser/trace_subscriber.h',
......
...@@ -6,28 +6,70 @@ ...@@ -6,28 +6,70 @@
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_ #define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_
#include "base/string16.h" #include "base/string16.h"
#include "base/callback.h"
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "content/public/common/speech_recognition_result.h"
namespace content { namespace content {
// This is the gatekeeper for speech recognition in the browser process. It class SpeechRecognitionEventListener;
// handles requests received from various render views and makes sure only one struct SpeechRecognitionSessionConfig;
// of them can use speech recognition at a time. It also sends recognition struct SpeechRecognitionSessionContext;
// results and status events to the render views when required.
// The SpeechRecognitionManager (SRM) is a singleton class that handles SR
// functionalities within Chrome. Everyone that needs to perform SR should
// interface exclusively with the SRM, receiving events through the callback
// interface SpeechRecognitionEventListener.
// Since many different sources can use SR in different times (some overlapping
// is allowed while waiting for results), the SRM has the further responsibility
// of handling separately and reliably (taking into account also call sequences
// that might not make sense, e.g., two subsequent AbortSession calls).
// In this sense a session, within the SRM, models the ongoing evolution of a
// SR request from the viewpoint of the end-user, abstracting all the concrete
// operations that must be carried out, that will be handled by inner classes.
class SpeechRecognitionManager { class SpeechRecognitionManager {
public: public:
static const int kSessionIDInvalid;
// Returns the singleton instance. // Returns the singleton instance.
CONTENT_EXPORT static SpeechRecognitionManager* GetInstance(); static CONTENT_EXPORT SpeechRecognitionManager* GetInstance();
// Creates a new recognition session.
virtual int CreateSession(const SpeechRecognitionSessionConfig& config,
SpeechRecognitionEventListener* listener) = 0;
// Starts/restarts recognition for an existing session, after performing a
// premilinary check on the delegate (CheckRecognitionIsAllowed).
virtual void StartSession(int session_id) = 0;
// Aborts recognition for an existing session, without providing any result.
virtual void AbortSession(int session_id) = 0;
// Aborts all sessions for a given listener, without providing any result.
virtual void AbortAllSessionsForListener(
SpeechRecognitionEventListener* listener) = 0;
// Stops audio capture for an existing session. The audio captured before the
// call will be processed, possibly ending up with a result.
virtual void StopAudioCaptureForSession(int session_id) = 0;
// Starts/restarts recognition for an existing request. // Sends the session to background preventing it from further interacting with
virtual void StartRecognitionForRequest(int session_id) = 0; // the browser (typically invoked when the user clicks outside the speech UI).
// The session will be silently continued in background if possible (in case
// it already finished capturing audio and was just waiting for the result) or
// will be aborted if user interaction (e.g., audio recording) was involved
// when this function was called.
virtual void SendSessionToBackground(int session_id) = 0;
// Cancels recognition for an existing request. // Retrieves the context associated to a session.
virtual void CancelRecognitionForRequest(int session_id) = 0; virtual SpeechRecognitionSessionContext GetSessionContext(
int session_id) const = 0;
// Called when the user clicks outside the speech input UI causing it to close // Looks-up an existing session using a caller-provided matcher function.
// and possibly have speech input go to another element. virtual int LookupSessionByContext(
virtual void FocusLostForRequest(int session_id) = 0; base::Callback<bool(
const content::SpeechRecognitionSessionContext&)> matcher)
const = 0;
// Returns true if the OS reports existence of audio recording devices. // Returns true if the OS reports existence of audio recording devices.
virtual bool HasAudioInputDevices() = 0; virtual bool HasAudioInputDevices() = 0;
......
...@@ -8,12 +8,9 @@ ...@@ -8,12 +8,9 @@
#include <string> #include <string>
#include "base/callback_forward.h"
#include "content/public/common/speech_recognition_error.h" #include "content/public/common/speech_recognition_error.h"
namespace gfx {
class Rect;
}
namespace content { namespace content {
struct SpeechRecognitionResult; struct SpeechRecognitionResult;
...@@ -22,26 +19,20 @@ struct SpeechRecognitionResult; ...@@ -22,26 +19,20 @@ struct SpeechRecognitionResult;
// user's permission and for fetching optional request information. // user's permission and for fetching optional request information.
class SpeechRecognitionManagerDelegate { class SpeechRecognitionManagerDelegate {
public: public:
// Describes the microphone errors that are reported via ShowMicError.
enum MicError {
MIC_ERROR_NO_DEVICE_AVAILABLE = 0,
MIC_ERROR_DEVICE_IN_USE
};
virtual ~SpeechRecognitionManagerDelegate() {} virtual ~SpeechRecognitionManagerDelegate() {}
// Get the optional request information if available. // Get the optional diagnostic hardware information if available.
virtual void GetRequestInfo(bool* can_report_metrics, virtual void GetDiagnosticInformation(bool* can_report_metrics,
std::string* request_info) = 0; std::string* hardware_info) = 0;
// Called when recognition has been requested from point |element_rect_| on // Called when recognition has been requested. The source point of the view
// the view port for the given caller. The embedder should call the // port can be retrieved looking-up the session context.
// StartRecognition or CancelRecognition methods on SpeechInutManager in virtual void ShowRecognitionRequested(int session_id) = 0;
// response.
virtual void ShowRecognitionRequested(int session_id, // Checks (asynchronously) if current setup allows speech recognition.
int render_process_id, virtual void CheckRecognitionIsAllowed(
int render_view_id, int session_id,
const gfx::Rect& element_rect) = 0; base::Callback<void(int session_id, bool is_allowed)> callback) = 0;
// Called when recognition is starting up. // Called when recognition is starting up.
virtual void ShowWarmUp(int session_id) = 0; virtual void ShowWarmUp(int session_id) = 0;
...@@ -57,12 +48,9 @@ class SpeechRecognitionManagerDelegate { ...@@ -57,12 +48,9 @@ class SpeechRecognitionManagerDelegate {
float volume, float volume,
float noise_volume) = 0; float noise_volume) = 0;
// Called when no microphone has been found.
virtual void ShowMicError(int session_id, MicError error) = 0;
// Called when there has been a error with the recognition. // Called when there has been a error with the recognition.
virtual void ShowRecognizerError(int session_id, virtual void ShowError(int session_id,
SpeechRecognitionErrorCode error) = 0; const SpeechRecognitionError& error) = 0;
// Called when recognition has ended or has been canceled. // Called when recognition has ended or has been canceled.
virtual void DoClose(int session_id) = 0; virtual void DoClose(int session_id) = 0;
......
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/public/browser/speech_recognition_session_config.h"
#include "net/url_request/url_request_context_getter.h"
namespace content {
SpeechRecognitionSessionConfig::SpeechRecognitionSessionConfig()
: filter_profanities(false) {
}
SpeechRecognitionSessionConfig::~SpeechRecognitionSessionConfig() {
}
} // namespace content
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
#include "base/basictypes.h"
#include "base/memory/ref_counted.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_context.h"
namespace net {
class URLRequestContextGetter;
}
namespace content {
// Configuration params for creating a new speech recognition session.
struct CONTENT_EXPORT SpeechRecognitionSessionConfig {
SpeechRecognitionSessionConfig();
~SpeechRecognitionSessionConfig();
std::string language;
std::string grammar;
std::string origin_url;
bool filter_profanities;
SpeechRecognitionSessionContext initial_context;
scoped_refptr<net::URLRequestContextGetter> url_request_context_getter;
};
} // namespace content
#endif // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
#include "content/common/content_export.h"
#include "ui/gfx/rect.h"
namespace content {
// The context information required by clients of the SpeechRecognitionManager
// (InputTagSpeechDispatcherHost) and its delegates for mapping the recognition
// session to other browser elements involved with the it (e.g., the page
// element that requested the recognition). The SpeechRecognitionManager is
// not aware of the content of this struct and does NOT use it for its purposes.
// However the manager keeps this struct "attached" to the recognition session
// during all the session lifetime, making its contents available to clients
// (In this regard, see SpeechRecognitionManager::GetSessionContext and
// SpeechRecognitionManager::LookupSessionByContext methods).
struct CONTENT_EXPORT SpeechRecognitionSessionContext {
SpeechRecognitionSessionContext()
: render_process_id(0),
render_view_id(0),
render_request_id(0) {}
~SpeechRecognitionSessionContext() {}
int render_process_id;
int render_view_id;
int render_request_id;
gfx::Rect element_rect;
};
} // namespace content
#endif // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment