Commit b450e909 authored by primiano@chromium.org's avatar primiano@chromium.org

Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)


BUG=116954
TEST=none.


Review URL: http://codereview.chromium.org/9972011

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@133967 0039d316-1c4b-4281-b951-d872f2087c98
parent 2e526f05
......@@ -16,8 +16,11 @@
#include "chrome/browser/tab_contents/tab_util.h"
#include "chrome/common/pref_names.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/render_view_host.h"
#include "content/public/browser/render_view_host_delegate.h"
#include "content/public/browser/resource_context.h"
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_result.h"
#include "grit/generated_resources.h"
......@@ -29,6 +32,7 @@
using content::BrowserThread;
using content::SpeechRecognitionManager;
using content::SpeechRecognitionSessionContext;
namespace speech {
......@@ -105,17 +109,18 @@ ChromeSpeechRecognitionManagerDelegate::
}
void ChromeSpeechRecognitionManagerDelegate::ShowRecognitionRequested(
int session_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect) {
bubble_controller_->CreateBubble(session_id, render_process_id,
render_view_id, element_rect);
int session_id) {
const SpeechRecognitionSessionContext& context =
SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
bubble_controller_->CreateBubble(session_id,
context.render_process_id,
context.render_view_id,
context.element_rect);
}
void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo(
void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation(
bool* can_report_metrics,
std::string* request_info) {
std::string* hardware_info) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
if (!optional_request_info_.get()) {
optional_request_info_ = new OptionalRequestInfo();
......@@ -129,7 +134,24 @@ void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo(
optional_request_info_->Refresh();
}
*can_report_metrics = optional_request_info_->can_report_metrics();
*request_info = optional_request_info_->value();
*hardware_info = optional_request_info_->value();
}
void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
const SpeechRecognitionSessionContext& context =
SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
// The check must be performed in the UI thread. We defer it posting to
// CheckRenderViewType, which will issue the callback on our behalf.
BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
base::Bind(&CheckRenderViewType,
session_id,
callback,
context.render_process_id,
context.render_view_id));
}
void ChromeSpeechRecognitionManagerDelegate::ShowWarmUp(int session_id) {
......@@ -149,51 +171,38 @@ void ChromeSpeechRecognitionManagerDelegate::ShowInputVolume(
bubble_controller_->SetBubbleInputVolume(session_id, volume, noise_volume);
}
void ChromeSpeechRecognitionManagerDelegate::ShowMicError(int session_id,
MicError error) {
switch (error) {
case MIC_ERROR_NO_DEVICE_AVAILABLE:
bubble_controller_->SetBubbleMessage(
session_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
void ChromeSpeechRecognitionManagerDelegate::ShowError(
int session_id, const content::SpeechRecognitionError& error) {
int error_message_id = 0;
switch (error.code) {
case content::SPEECH_RECOGNITION_ERROR_AUDIO:
switch (error.details) {
case content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC:
error_message_id = IDS_SPEECH_INPUT_NO_MIC;
break;
case content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE:
error_message_id = IDS_SPEECH_INPUT_MIC_IN_USE;
break;
default:
error_message_id = IDS_SPEECH_INPUT_MIC_ERROR;
break;
}
break;
case MIC_ERROR_DEVICE_IN_USE:
bubble_controller_->SetBubbleMessage(
session_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_MIC_IN_USE));
case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH:
error_message_id = IDS_SPEECH_INPUT_NO_SPEECH;
break;
case content::SPEECH_RECOGNITION_ERROR_NO_MATCH:
error_message_id = IDS_SPEECH_INPUT_NO_RESULTS;
break;
case content::SPEECH_RECOGNITION_ERROR_NETWORK:
error_message_id = IDS_SPEECH_INPUT_NET_ERROR;
break;
default:
NOTREACHED();
}
}
void ChromeSpeechRecognitionManagerDelegate::ShowRecognizerError(
int session_id, content::SpeechRecognitionErrorCode error) {
struct ErrorMessageMapEntry {
content::SpeechRecognitionErrorCode error;
int message_id;
};
ErrorMessageMapEntry error_message_map[] = {
{
content::SPEECH_RECOGNITION_ERROR_AUDIO, IDS_SPEECH_INPUT_MIC_ERROR
}, {
content::SPEECH_RECOGNITION_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
}, {
content::SPEECH_RECOGNITION_ERROR_NO_MATCH, IDS_SPEECH_INPUT_NO_RESULTS
}, {
content::SPEECH_RECOGNITION_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
}
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
if (error_message_map[i].error == error) {
bubble_controller_->SetBubbleMessage(
session_id,
l10n_util::GetStringUTF16(error_message_map[i].message_id));
NOTREACHED() << "unknown error " << error.code;
return;
}
}
NOTREACHED() << "unknown error " << error;
bubble_controller_->SetBubbleMessage(
session_id, l10n_util::GetStringUTF16(error_message_id));
}
void ChromeSpeechRecognitionManagerDelegate::DoClose(int session_id) {
......@@ -205,18 +214,40 @@ void ChromeSpeechRecognitionManagerDelegate::InfoBubbleButtonClicked(
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
if (button == SpeechRecognitionBubble::BUTTON_CANCEL) {
SpeechRecognitionManager::GetInstance()->CancelRecognitionForRequest(
session_id);
SpeechRecognitionManager::GetInstance()->AbortSession(session_id);
} else if (button == SpeechRecognitionBubble::BUTTON_TRY_AGAIN) {
SpeechRecognitionManager::GetInstance()->StartRecognitionForRequest(
session_id);
SpeechRecognitionManager::GetInstance()->StartSession(session_id);
}
}
void ChromeSpeechRecognitionManagerDelegate::InfoBubbleFocusChanged(
int session_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
SpeechRecognitionManager::GetInstance()->FocusLostForRequest(session_id);
SpeechRecognitionManager::GetInstance()->SendSessionToBackground(session_id);
}
void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback,
int render_process_id,
int render_view_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
const content::RenderViewHost* render_view_host =
content::RenderViewHost::FromID(render_process_id, render_view_id);
// For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show
// a popup, including the speech input bubble. In these cases for privacy
// reasons we don't want to start recording if the user can't be properly
// notified. An example of this is trying to show the speech input bubble
// within an extension popup: http://crbug.com/92083. In these situations the
// speech input extension API should be used instead.
const bool allowed = (render_view_host != NULL &&
render_view_host->GetDelegate() != NULL &&
render_view_host->GetDelegate()->GetRenderViewType() ==
content::VIEW_TYPE_WEB_CONTENTS);
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
base::Bind(callback, session_id, allowed));
}
} // namespace speech
......@@ -12,8 +12,8 @@
namespace speech {
// This is Chrome's implementation of the SpeechRecognitionManager interface.
// This class is a singleton and accessed via the Get method.
// This is Chrome's implementation of the SpeechRecognitionManagerDelegate
// interface.
class ChromeSpeechRecognitionManagerDelegate
: NON_EXPORTED_BASE(public content::SpeechRecognitionManagerDelegate),
public SpeechRecognitionBubbleControllerDelegate {
......@@ -28,27 +28,33 @@ class ChromeSpeechRecognitionManagerDelegate
protected:
// SpeechRecognitionManagerDelegate methods.
virtual void GetRequestInfo(bool* can_report_metrics,
std::string* request_info) OVERRIDE;
virtual void ShowRecognitionRequested(int session_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect) OVERRIDE;
virtual void GetDiagnosticInformation(bool* can_report_metrics,
std::string* hardware_info) OVERRIDE;
virtual void CheckRecognitionIsAllowed(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback) OVERRIDE;
virtual void ShowRecognitionRequested(int session_id) OVERRIDE;
virtual void ShowWarmUp(int session_id) OVERRIDE;
virtual void ShowRecognizing(int session_id) OVERRIDE;
virtual void ShowRecording(int session_id) OVERRIDE;
virtual void ShowInputVolume(int session_id,
float volume,
float noise_volume) OVERRIDE;
virtual void ShowMicError(int session_id,
MicError error) OVERRIDE;
virtual void ShowRecognizerError(
int session_id, content::SpeechRecognitionErrorCode error) OVERRIDE;
virtual void ShowError(int session_id,
const content::SpeechRecognitionError& error) OVERRIDE;
virtual void DoClose(int session_id) OVERRIDE;
private:
class OptionalRequestInfo;
// Checks for VIEW_TYPE_WEB_CONTENTS host in the UI thread and notifies back
// the result in the IO thread through |callback|.
static void CheckRenderViewType(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback,
int render_process_id,
int render_view_id);
scoped_refptr<SpeechRecognitionBubbleController> bubble_controller_;
scoped_refptr<OptionalRequestInfo> optional_request_info_;
......
......@@ -8,6 +8,7 @@
#include "base/memory/scoped_ptr.h"
#include "content/common/content_export.h"
#include "content/public/browser/browser_message_filter.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "net/url_request/url_request_context_getter.h"
struct InputTagSpeechHostMsg_StartRecognition_Params;
......@@ -17,32 +18,37 @@ class SpeechRecognitionPreferences;
struct SpeechRecognitionResult;
}
namespace media {
class AudioManager;
}
namespace speech {
class SpeechRecognitionManagerImpl;
// InputTagSpeechDispatcherHost is a delegate for Speech API messages used by
// RenderMessageFilter.
// It's the complement of InputTagSpeechDispatcher (owned by RenderView).
// RenderMessageFilter. Basically it acts as a proxy, relaying the events coming
// from the SpeechRecognitionManager to IPC messages (and vice versa).
// It's the complement of SpeechRecognitionDispatcher (owned by RenderView).
class CONTENT_EXPORT InputTagSpeechDispatcherHost
: public content::BrowserMessageFilter {
: public content::BrowserMessageFilter,
public content::SpeechRecognitionEventListener {
public:
class Sessions;
InputTagSpeechDispatcherHost(
int render_process_id,
net::URLRequestContextGetter* context_getter,
net::URLRequestContextGetter* url_request_context_getter,
content::SpeechRecognitionPreferences* recognition_preferences);
// Methods called by SpeechRecognitionManagerImpl.
void SetRecognitionResult(int session_id,
const content::SpeechRecognitionResult& result);
void DidCompleteRecording(int session_id);
void DidCompleteRecognition(int session_id);
// SpeechRecognitionEventListener methods.
virtual void OnRecognitionStart(int session_id) OVERRIDE;
virtual void OnAudioStart(int session_id) OVERRIDE;
virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
virtual void OnSoundStart(int session_id) OVERRIDE;
virtual void OnSoundEnd(int session_id) OVERRIDE;
virtual void OnAudioEnd(int session_id) OVERRIDE;
virtual void OnRecognitionEnd(int session_id) OVERRIDE;
virtual void OnRecognitionResult(
int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
virtual void OnRecognitionError(
int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
virtual void OnAudioLevelsChange(
int session_id, float volume, float noise_volume) OVERRIDE;
// content::BrowserMessageFilter implementation.
virtual bool OnMessageReceived(const IPC::Message& message,
......@@ -66,7 +72,7 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost
int render_process_id_;
bool may_have_pending_requests_; // Set if we received any speech IPC request
scoped_refptr<net::URLRequestContextGetter> context_getter_;
scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
scoped_refptr<content::SpeechRecognitionPreferences> recognition_preferences_;
static SpeechRecognitionManagerImpl* manager_;
......
......@@ -5,6 +5,7 @@
#include "base/bind.h"
#include "base/command_line.h"
#include "base/file_path.h"
#include "base/memory/scoped_ptr.h"
#include "base/string_number_conversions.h"
#include "base/synchronization/waitable_event.h"
#include "base/utf_string_conversions.h"
......@@ -16,11 +17,15 @@
#include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/browser/web_contents/web_contents_impl.h"
#include "content/public/browser/notification_types.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/content_switches.h"
#include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_result.h"
#include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h"
using content::SpeechRecognitionEventListener;
using content::SpeechRecognitionSessionContext;
using content::NavigationController;
using content::WebContents;
......@@ -36,7 +41,7 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
public:
FakeSpeechRecognitionManager()
: session_id_(0),
delegate_(NULL),
listener_(NULL),
did_cancel_all_(false),
should_send_fake_response_(true),
recognition_started_event_(false, false) {
......@@ -63,23 +68,24 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
}
// SpeechRecognitionManager methods.
virtual void StartRecognition(
InputTagSpeechDispatcherHost* delegate,
int session_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect,
const std::string& language,
const std::string& grammar,
const std::string& origin_url,
net::URLRequestContextGetter* context_getter,
content::SpeechRecognitionPreferences* recognition_prefs) OVERRIDE {
VLOG(1) << "StartRecognition invoked.";
virtual int CreateSession(
const content::SpeechRecognitionSessionConfig& config,
SpeechRecognitionEventListener* event_listener) OVERRIDE {
VLOG(1) << "FAKE CreateSession invoked.";
EXPECT_EQ(0, session_id_);
EXPECT_EQ(NULL, delegate_);
session_id_ = session_id;
delegate_ = delegate;
grammar_ = grammar;
EXPECT_EQ(NULL, listener_);
listener_ = event_listener;
grammar_ = config.grammar;
session_ctx_ = config.initial_context;
session_id_ = 1;
return session_id_;
}
virtual void StartSession(int session_id) OVERRIDE {
VLOG(1) << "FAKE StartSession invoked.";
EXPECT_EQ(session_id, session_id_);
EXPECT_TRUE(listener_ != NULL);
if (should_send_fake_response_) {
// Give the fake result in a short while.
MessageLoop::current()->PostTask(FROM_HERE, base::Bind(
......@@ -93,45 +99,69 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
}
recognition_started_event_.Signal();
}
virtual void CancelRecognition(int session_id) OVERRIDE {
VLOG(1) << "CancelRecognition invoked.";
virtual void AbortSession(int session_id) OVERRIDE {
VLOG(1) << "FAKE AbortSession invoked.";
EXPECT_EQ(session_id_, session_id);
session_id_ = 0;
delegate_ = NULL;
listener_ = NULL;
}
virtual void StopRecording(int session_id) OVERRIDE {
virtual void StopAudioCaptureForSession(int session_id) OVERRIDE {
VLOG(1) << "StopRecording invoked.";
EXPECT_EQ(session_id_, session_id);
// Nothing to do here since we aren't really recording.
}
virtual void CancelAllRequestsWithDelegate(
InputTagSpeechDispatcherHost* delegate) OVERRIDE {
virtual void AbortAllSessionsForListener(
content::SpeechRecognitionEventListener* listener) OVERRIDE {
VLOG(1) << "CancelAllRequestsWithDelegate invoked.";
// delegate_ is set to NULL if a fake result was received (see below), so
// check that delegate_ matches the incoming parameter only when there is
// listener_ is set to NULL if a fake result was received (see below), so
// check that listener_ matches the incoming parameter only when there is
// no fake result sent.
EXPECT_TRUE(should_send_fake_response_ || delegate_ == delegate);
EXPECT_TRUE(should_send_fake_response_ || listener_ == listener);
did_cancel_all_ = true;
}
virtual void SendSessionToBackground(int session_id) OVERRIDE {}
virtual bool HasAudioInputDevices() OVERRIDE { return true; }
virtual bool IsCapturingAudio() OVERRIDE { return true; }
virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); }
virtual void ShowAudioInputSettings() OVERRIDE {}
virtual int LookupSessionByContext(
base::Callback<bool(
const content::SpeechRecognitionSessionContext&)> matcher)
const OVERRIDE {
bool matched = matcher.Run(session_ctx_);
return matched ? session_id_ : 0;
}
virtual content::SpeechRecognitionSessionContext GetSessionContext(
int session_id) const OVERRIDE {
EXPECT_EQ(session_id, session_id_);
return session_ctx_;
}
private:
void SetFakeRecognitionResult() {
if (session_id_) { // Do a check in case we were cancelled..
VLOG(1) << "Setting fake recognition result.";
delegate_->DidCompleteRecording(session_id_);
listener_->OnAudioEnd(session_id_);
content::SpeechRecognitionResult results;
results.hypotheses.push_back(content::SpeechRecognitionHypothesis(
ASCIIToUTF16(kTestResult), 1.0));
delegate_->SetRecognitionResult(session_id_, results);
delegate_->DidCompleteRecognition(session_id_);
listener_->OnRecognitionResult(session_id_, results);
listener_->OnRecognitionEnd(session_id_);
session_id_ = 0;
delegate_ = NULL;
listener_ = NULL;
VLOG(1) << "Finished setting fake recognition result.";
}
}
int session_id_;
InputTagSpeechDispatcherHost* delegate_;
SpeechRecognitionEventListener* listener_;
SpeechRecognitionSessionContext session_ctx_;
std::string grammar_;
bool did_cancel_all_;
bool should_send_fake_response_;
......
......@@ -9,69 +9,66 @@
#include <string>
#include "base/basictypes.h"
#include "base/callback.h"
#include "base/compiler_specific.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
#include "ui/gfx/rect.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/common/speech_recognition_error.h"
namespace content {
class ResourceContext;
class SpeechRecognitionManagerDelegate;
class SpeechRecognitionPreferences;
struct SpeechRecognitionResult;
class SpeechRecognizer;
}
namespace net {
class URLRequestContextGetter;
}
namespace speech {
class InputTagSpeechDispatcherHost;
class CONTENT_EXPORT SpeechRecognitionManagerImpl
: NON_EXPORTED_BASE(public content::SpeechRecognitionManager),
NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) {
class SpeechRecognizerImpl;
// This is the manager for speech recognition. It is a singleton instance in
// the browser process and can serve several requests. Each recognition request
// corresponds to a session, initiated via |CreateSession|.
// In every moment the manager has at most one "interactive" session (identified
// by |interactive_session_id_|), that is the session that is currently holding
// user attention. For privacy reasons, only the interactive session is allowed
// to capture audio from the microphone. However, after audio capture is
// completed, a session can be sent to background and can live in parallel with
// other sessions, while waiting for its results.
//
// More in details, SpeechRecognitionManager has the following responsibilities:
// - Handles requests received from various render views and makes sure only
// one of them accesses the audio device at any given time.
// - Relays recognition results/status/error events of each session to the
// corresponding listener (demuxing on the base of their session_id).
// - Handles the instantiation of SpeechRecognitionEngine objects when
// requested by SpeechRecognitionSessions.
class CONTENT_EXPORT SpeechRecognitionManagerImpl :
public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {
public:
static SpeechRecognitionManagerImpl* GetInstance();
// SpeechRecognitionManager implementation:
virtual void StartRecognitionForRequest(int session_id) OVERRIDE;
virtual void CancelRecognitionForRequest(int session_id) OVERRIDE;
virtual void FocusLostForRequest(int session_id) OVERRIDE;
// SpeechRecognitionManager implementation.
virtual int CreateSession(
const content::SpeechRecognitionSessionConfig& config,
SpeechRecognitionEventListener* event_listener) OVERRIDE;
virtual void StartSession(int session_id) OVERRIDE;
virtual void AbortSession(int session_id) OVERRIDE;
virtual void AbortAllSessionsForListener(
content::SpeechRecognitionEventListener* listener) OVERRIDE;
virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
virtual void SendSessionToBackground(int session_id) OVERRIDE;
virtual content::SpeechRecognitionSessionContext GetSessionContext(
int session_id) const OVERRIDE;
virtual int LookupSessionByContext(
base::Callback<bool(
const content::SpeechRecognitionSessionContext&)> matcher)
const OVERRIDE;
virtual bool HasAudioInputDevices() OVERRIDE;
virtual bool IsCapturingAudio() OVERRIDE;
virtual string16 GetAudioInputDeviceModel() OVERRIDE;
virtual void ShowAudioInputSettings() OVERRIDE;
// Handlers for requests from render views.
// |delegate| is a weak pointer and should remain valid until
// its |DidCompleteRecognition| method is called or recognition is cancelled.
// |render_process_id| is the ID of the renderer process initiating the
// request.
// |element_rect| is the display bounds of the html element requesting speech
// input (in page coordinates).
virtual void StartRecognition(
InputTagSpeechDispatcherHost* delegate,
int session_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect,
const std::string& language,
const std::string& grammar,
const std::string& origin_url,
net::URLRequestContextGetter* context_getter,
content::SpeechRecognitionPreferences* speech_recognition_prefs);
virtual void CancelRecognition(int session_id);
virtual void CancelAllRequestsWithDelegate(
InputTagSpeechDispatcherHost* delegate);
virtual void StopRecording(int session_id);
// SpeechRecognitionEventListener methods.
virtual void OnRecognitionStart(int session_id) OVERRIDE;
virtual void OnAudioStart(int session_id) OVERRIDE;
......@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
virtual void OnRecognitionError(
int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
virtual void OnAudioLevelsChange(
int session_id, float volume, float noise_volume) OVERRIDE;
virtual void OnAudioLevelsChange(int session_id, float volume,
float noise_volume) OVERRIDE;
protected:
// Private constructor to enforce singleton.
......@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
SpeechRecognitionManagerImpl();
virtual ~SpeechRecognitionManagerImpl();
bool HasPendingRequest(int session_id) const;
private:
struct Request {
Request();
~Request();
// Data types for the internal Finite State Machine (FSM).
enum FSMState {
STATE_IDLE = 0,
STATE_INTERACTIVE,
STATE_BACKGROUND,
STATE_WAITING_FOR_DELETION,
STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION
};
InputTagSpeechDispatcherHost* delegate;
scoped_refptr<content::SpeechRecognizer> recognizer;
bool is_active; // Set to true when recording or recognition is going on.
enum FSMEvent {
EVENT_ABORT = 0,
EVENT_START,
EVENT_STOP_CAPTURE,
EVENT_SET_BACKGROUND,
EVENT_RECOGNITION_ENDED,
EVENT_RECOGNITION_RESULT,
EVENT_RECOGNITION_ERROR,
EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR
};
struct SpeechRecognitionParams;
struct Session {
Session();
~Session();
InputTagSpeechDispatcherHost* GetDelegate(int session_id) const;
int id;
content::SpeechRecognitionEventListener* event_listener;
content::SpeechRecognitionSessionContext context;
scoped_refptr<SpeechRecognizerImpl> recognizer;
FSMState state;
bool error_occurred;
};
void CheckRenderViewTypeAndStartRecognition(
const SpeechRecognitionParams& params);
void ProceedStartingRecognition(const SpeechRecognitionParams& params);
struct FSMEventArgs {
explicit FSMEventArgs(FSMEvent event_value);
~FSMEventArgs();
void CancelRecognitionAndInformDelegate(int session_id);
FSMEvent event;
content::SpeechRecognitionError speech_error;
};
typedef std::map<int, Request> SpeechRecognizerMap;
SpeechRecognizerMap requests_;
std::string request_info_;
bool can_report_metrics_;
int recording_session_id_;
scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;
// Callback issued by the SpeechRecognitionManagerDelegate for reporting
// asynchronously the result of the CheckRecognitionIsAllowed call.
void RecognitionAllowedCallback(int session_id, bool is_allowed);
// Entry point for pushing any external event into the session handling FSM.
void DispatchEvent(int session_id, FSMEventArgs args);
// Defines the behavior of the session handling FSM, selecting the appropriate
// transition according to the session, its current state and the event.
FSMState ExecuteTransitionAndGetNextState(Session& session,
const FSMEventArgs& event_args);
// The methods below handle transitions of the session handling FSM.
FSMState SessionStart(Session& session, const FSMEventArgs& event_args);
FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);
FSMState SessionStopAudioCapture(Session& session,
const FSMEventArgs& event_args);
FSMState SessionAbortIfCapturingAudioOrBackground(
Session& session, const FSMEventArgs& event_args);
FSMState SessionSetBackground(Session& session,
const FSMEventArgs& event_args);
FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);
FSMState SessionReportNoMatch(Session& session,
const FSMEventArgs& event_args);
FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);
FSMState DoNothing(Session& session, const FSMEventArgs& event_args);
FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);
bool SessionExists(int session_id) const;
content::SpeechRecognitionEventListener* GetListener(int session_id) const;
int GetNextSessionID();
typedef std::map<int, Session> SessionsTable;
SessionsTable sessions_;
int interactive_session_id_;
int last_session_id_;
bool is_dispatching_event_;
content::SpeechRecognitionManagerDelegate* delegate_;
};
} // namespace speech
......
......@@ -133,6 +133,9 @@
'public/browser/speech_recognition_manager.h',
'public/browser/speech_recognition_manager_delegate.h',
'public/browser/speech_recognition_preferences.h',
'public/browser/speech_recognition_session_config.cc',
'public/browser/speech_recognition_session_config.h',
'public/browser/speech_recognition_session_context.h',
'public/browser/speech_recognizer.h',
'public/browser/trace_controller.h',
'public/browser/trace_subscriber.h',
......
......@@ -6,28 +6,70 @@
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_
#include "base/string16.h"
#include "base/callback.h"
#include "content/common/content_export.h"
#include "content/public/common/speech_recognition_result.h"
namespace content {
// This is the gatekeeper for speech recognition in the browser process. It
// handles requests received from various render views and makes sure only one
// of them can use speech recognition at a time. It also sends recognition
// results and status events to the render views when required.
class SpeechRecognitionEventListener;
struct SpeechRecognitionSessionConfig;
struct SpeechRecognitionSessionContext;
// The SpeechRecognitionManager (SRM) is a singleton class that handles SR
// functionalities within Chrome. Everyone that needs to perform SR should
// interface exclusively with the SRM, receiving events through the callback
// interface SpeechRecognitionEventListener.
// Since many different sources can use SR in different times (some overlapping
// is allowed while waiting for results), the SRM has the further responsibility
// of handling separately and reliably (taking into account also call sequences
// that might not make sense, e.g., two subsequent AbortSession calls).
// In this sense a session, within the SRM, models the ongoing evolution of a
// SR request from the viewpoint of the end-user, abstracting all the concrete
// operations that must be carried out, that will be handled by inner classes.
class SpeechRecognitionManager {
public:
static const int kSessionIDInvalid;
// Returns the singleton instance.
CONTENT_EXPORT static SpeechRecognitionManager* GetInstance();
static CONTENT_EXPORT SpeechRecognitionManager* GetInstance();
// Creates a new recognition session.
virtual int CreateSession(const SpeechRecognitionSessionConfig& config,
SpeechRecognitionEventListener* listener) = 0;
// Starts/restarts recognition for an existing session, after performing a
// premilinary check on the delegate (CheckRecognitionIsAllowed).
virtual void StartSession(int session_id) = 0;
// Aborts recognition for an existing session, without providing any result.
virtual void AbortSession(int session_id) = 0;
// Aborts all sessions for a given listener, without providing any result.
virtual void AbortAllSessionsForListener(
SpeechRecognitionEventListener* listener) = 0;
// Stops audio capture for an existing session. The audio captured before the
// call will be processed, possibly ending up with a result.
virtual void StopAudioCaptureForSession(int session_id) = 0;
// Starts/restarts recognition for an existing request.
virtual void StartRecognitionForRequest(int session_id) = 0;
// Sends the session to background preventing it from further interacting with
// the browser (typically invoked when the user clicks outside the speech UI).
// The session will be silently continued in background if possible (in case
// it already finished capturing audio and was just waiting for the result) or
// will be aborted if user interaction (e.g., audio recording) was involved
// when this function was called.
virtual void SendSessionToBackground(int session_id) = 0;
// Cancels recognition for an existing request.
virtual void CancelRecognitionForRequest(int session_id) = 0;
// Retrieves the context associated to a session.
virtual SpeechRecognitionSessionContext GetSessionContext(
int session_id) const = 0;
// Called when the user clicks outside the speech input UI causing it to close
// and possibly have speech input go to another element.
virtual void FocusLostForRequest(int session_id) = 0;
// Looks-up an existing session using a caller-provided matcher function.
virtual int LookupSessionByContext(
base::Callback<bool(
const content::SpeechRecognitionSessionContext&)> matcher)
const = 0;
// Returns true if the OS reports existence of audio recording devices.
virtual bool HasAudioInputDevices() = 0;
......
......@@ -8,12 +8,9 @@
#include <string>
#include "base/callback_forward.h"
#include "content/public/common/speech_recognition_error.h"
namespace gfx {
class Rect;
}
namespace content {
struct SpeechRecognitionResult;
......@@ -22,26 +19,20 @@ struct SpeechRecognitionResult;
// user's permission and for fetching optional request information.
class SpeechRecognitionManagerDelegate {
public:
// Describes the microphone errors that are reported via ShowMicError.
enum MicError {
MIC_ERROR_NO_DEVICE_AVAILABLE = 0,
MIC_ERROR_DEVICE_IN_USE
};
virtual ~SpeechRecognitionManagerDelegate() {}
// Get the optional request information if available.
virtual void GetRequestInfo(bool* can_report_metrics,
std::string* request_info) = 0;
// Get the optional diagnostic hardware information if available.
virtual void GetDiagnosticInformation(bool* can_report_metrics,
std::string* hardware_info) = 0;
// Called when recognition has been requested from point |element_rect_| on
// the view port for the given caller. The embedder should call the
// StartRecognition or CancelRecognition methods on SpeechInutManager in
// response.
virtual void ShowRecognitionRequested(int session_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect) = 0;
// Called when recognition has been requested. The source point of the view
// port can be retrieved looking-up the session context.
virtual void ShowRecognitionRequested(int session_id) = 0;
// Checks (asynchronously) if current setup allows speech recognition.
virtual void CheckRecognitionIsAllowed(
int session_id,
base::Callback<void(int session_id, bool is_allowed)> callback) = 0;
// Called when recognition is starting up.
virtual void ShowWarmUp(int session_id) = 0;
......@@ -57,12 +48,9 @@ class SpeechRecognitionManagerDelegate {
float volume,
float noise_volume) = 0;
// Called when no microphone has been found.
virtual void ShowMicError(int session_id, MicError error) = 0;
// Called when there has been a error with the recognition.
virtual void ShowRecognizerError(int session_id,
SpeechRecognitionErrorCode error) = 0;
virtual void ShowError(int session_id,
const SpeechRecognitionError& error) = 0;
// Called when recognition has ended or has been canceled.
virtual void DoClose(int session_id) = 0;
......
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/public/browser/speech_recognition_session_config.h"
#include "net/url_request/url_request_context_getter.h"
namespace content {
SpeechRecognitionSessionConfig::SpeechRecognitionSessionConfig()
: filter_profanities(false) {
}
SpeechRecognitionSessionConfig::~SpeechRecognitionSessionConfig() {
}
} // namespace content
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
#include "base/basictypes.h"
#include "base/memory/ref_counted.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_context.h"
namespace net {
class URLRequestContextGetter;
}
namespace content {
// Configuration params for creating a new speech recognition session.
struct CONTENT_EXPORT SpeechRecognitionSessionConfig {
SpeechRecognitionSessionConfig();
~SpeechRecognitionSessionConfig();
std::string language;
std::string grammar;
std::string origin_url;
bool filter_profanities;
SpeechRecognitionSessionContext initial_context;
scoped_refptr<net::URLRequestContextGetter> url_request_context_getter;
};
} // namespace content
#endif // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
#include "content/common/content_export.h"
#include "ui/gfx/rect.h"
namespace content {
// The context information required by clients of the SpeechRecognitionManager
// (InputTagSpeechDispatcherHost) and its delegates for mapping the recognition
// session to other browser elements involved with the it (e.g., the page
// element that requested the recognition). The SpeechRecognitionManager is
// not aware of the content of this struct and does NOT use it for its purposes.
// However the manager keeps this struct "attached" to the recognition session
// during all the session lifetime, making its contents available to clients
// (In this regard, see SpeechRecognitionManager::GetSessionContext and
// SpeechRecognitionManager::LookupSessionByContext methods).
struct CONTENT_EXPORT SpeechRecognitionSessionContext {
SpeechRecognitionSessionContext()
: render_process_id(0),
render_view_id(0),
render_request_id(0) {}
~SpeechRecognitionSessionContext() {}
int render_process_id;
int render_view_id;
int render_request_id;
gfx::Rect element_rect;
};
} // namespace content
#endif // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment