Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)

BUG=116954 TEST=none. Review URL: http://codereview.chromium.org/9972011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@133967 0039d316-1c4b-4281-b951-d872f2087c98

Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)
BUG=116954 TEST=none. Review URL: http://codereview.chromium.org/9972011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@133967 0039d316-1c4b-4281-b951-d872f2087c98
b450e909 · primiano@chromium.org · 2e526f05 · b450e909 · b450e909 · b450e909
Commit b450e909 authored Apr 25, 2012 by primiano@chromium.org
13 changed files
--- a/chrome/browser/speech/chrome_speech_recognition_manager_delegate.cc
+++ b/chrome/browser/speech/chrome_speech_recognition_manager_delegate.cc
@@ -16,8 +16,11 @@
 #include "chrome/browser/tab_contents/tab_util.h"
 #include "chrome/common/pref_names.h"
 #include "content/public/browser/browser_thread.h"
+#include "content/public/browser/render_view_host.h"
+#include "content/public/browser/render_view_host_delegate.h"
 #include "content/public/browser/resource_context.h"
 #include "content/public/browser/speech_recognition_manager.h"
+#include "content/public/browser/speech_recognition_session_context.h"
 #include "content/public/common/speech_recognition_error.h"
 #include "content/public/common/speech_recognition_result.h"
 #include "grit/generated_resources.h"
@@ -29,6 +32,7 @@

 using content::BrowserThread;
 using content::SpeechRecognitionManager;
+using content::SpeechRecognitionSessionContext;

 namespace speech {

@@ -105,17 +109,18 @@ ChromeSpeechRecognitionManagerDelegate::
 }

 void ChromeSpeechRecognitionManagerDelegate::ShowRecognitionRequested(
-    int session_id,
-    int render_process_id,
-    int render_view_id,
-    const gfx::Rect& element_rect) {
-  bubble_controller_->CreateBubble(session_id, render_process_id,
-                                   render_view_id, element_rect);
+    int session_id) {
+  const SpeechRecognitionSessionContext& context =
+      SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
+  bubble_controller_->CreateBubble(session_id,
+                                   context.render_process_id,
+                                   context.render_view_id,
+                                   context.element_rect);
 }

-void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo(
+void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation(
    bool* can_report_metrics,
-    std::string* request_info) {
+    std::string* hardware_info) {
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
  if (!optional_request_info_.get()) {
    optional_request_info_ = new OptionalRequestInfo();
@@ -129,7 +134,24 @@ void ChromeSpeechRecognitionManagerDelegate::GetRequestInfo(
    optional_request_info_->Refresh();
  }
  *can_report_metrics = optional_request_info_->can_report_metrics();
-  *request_info = optional_request_info_->value();
+  *hardware_info = optional_request_info_->value();
+}
+
+void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed(
+    int session_id,
+    base::Callback<void(int session_id, bool is_allowed)> callback) {
+  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+  const SpeechRecognitionSessionContext& context =
+      SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
+
+  // The check must be performed in the UI thread. We defer it posting to
+  // CheckRenderViewType, which will issue the callback on our behalf.
+  BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
+                          base::Bind(&CheckRenderViewType,
+                                     session_id,
+                                     callback,
+                                     context.render_process_id,
+                                     context.render_view_id));
 }

 void ChromeSpeechRecognitionManagerDelegate::ShowWarmUp(int session_id) {
@@ -149,51 +171,38 @@ void ChromeSpeechRecognitionManagerDelegate::ShowInputVolume(
  bubble_controller_->SetBubbleInputVolume(session_id, volume, noise_volume);
 }

-void ChromeSpeechRecognitionManagerDelegate::ShowMicError(int session_id,
-                                                          MicError error) {
-  switch (error) {
-    case MIC_ERROR_NO_DEVICE_AVAILABLE:
-      bubble_controller_->SetBubbleMessage(
-          session_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
+void ChromeSpeechRecognitionManagerDelegate::ShowError(
+    int session_id, const content::SpeechRecognitionError& error) {
+  int error_message_id = 0;
+  switch (error.code) {
+    case content::SPEECH_RECOGNITION_ERROR_AUDIO:
+      switch (error.details) {
+        case content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC:
+          error_message_id = IDS_SPEECH_INPUT_NO_MIC;
+          break;
+        case content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE:
+          error_message_id = IDS_SPEECH_INPUT_MIC_IN_USE;
+          break;
+        default:
+          error_message_id = IDS_SPEECH_INPUT_MIC_ERROR;
+          break;
+      }
      break;
-
-    case MIC_ERROR_DEVICE_IN_USE:
-      bubble_controller_->SetBubbleMessage(
-          session_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_MIC_IN_USE));
+    case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH:
+      error_message_id = IDS_SPEECH_INPUT_NO_SPEECH;
+      break;
+    case content::SPEECH_RECOGNITION_ERROR_NO_MATCH:
+      error_message_id = IDS_SPEECH_INPUT_NO_RESULTS;
+      break;
+    case content::SPEECH_RECOGNITION_ERROR_NETWORK:
+      error_message_id = IDS_SPEECH_INPUT_NET_ERROR;
      break;
-
    default:
-      NOTREACHED();
-  }
-}
-
-void ChromeSpeechRecognitionManagerDelegate::ShowRecognizerError(
-    int session_id, content::SpeechRecognitionErrorCode error) {
-  struct ErrorMessageMapEntry {
-    content::SpeechRecognitionErrorCode error;
-    int message_id;
-  };
-  ErrorMessageMapEntry error_message_map[] = {
-    {
-      content::SPEECH_RECOGNITION_ERROR_AUDIO, IDS_SPEECH_INPUT_MIC_ERROR
-    }, {
-      content::SPEECH_RECOGNITION_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
-    }, {
-      content::SPEECH_RECOGNITION_ERROR_NO_MATCH, IDS_SPEECH_INPUT_NO_RESULTS
-    }, {
-      content::SPEECH_RECOGNITION_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
-    }
-  };
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
-    if (error_message_map[i].error == error) {
-      bubble_controller_->SetBubbleMessage(
-          session_id,
-          l10n_util::GetStringUTF16(error_message_map[i].message_id));
+      NOTREACHED() << "unknown error " << error.code;
      return;
-    }
  }
-
-  NOTREACHED() << "unknown error " << error;
+  bubble_controller_->SetBubbleMessage(
+      session_id, l10n_util::GetStringUTF16(error_message_id));
 }

 void ChromeSpeechRecognitionManagerDelegate::DoClose(int session_id) {
@@ -205,18 +214,40 @@ void ChromeSpeechRecognitionManagerDelegate::InfoBubbleButtonClicked(
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

  if (button == SpeechRecognitionBubble::BUTTON_CANCEL) {
-    SpeechRecognitionManager::GetInstance()->CancelRecognitionForRequest(
-        session_id);
+    SpeechRecognitionManager::GetInstance()->AbortSession(session_id);
  } else if (button == SpeechRecognitionBubble::BUTTON_TRY_AGAIN) {
-    SpeechRecognitionManager::GetInstance()->StartRecognitionForRequest(
-        session_id);
+    SpeechRecognitionManager::GetInstance()->StartSession(session_id);
  }
 }

 void ChromeSpeechRecognitionManagerDelegate::InfoBubbleFocusChanged(
    int session_id) {
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
-  SpeechRecognitionManager::GetInstance()->FocusLostForRequest(session_id);
+  SpeechRecognitionManager::GetInstance()->SendSessionToBackground(session_id);
+}
+
+void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType(
+    int session_id,
+    base::Callback<void(int session_id, bool is_allowed)> callback,
+    int render_process_id,
+    int render_view_id) {
+  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+  const content::RenderViewHost* render_view_host =
+      content::RenderViewHost::FromID(render_process_id, render_view_id);
+
+  // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show
+  // a popup, including the speech input bubble. In these cases for privacy
+  // reasons we don't want to start recording if the user can't be properly
+  // notified. An example of this is trying to show the speech input bubble
+  // within an extension popup: http://crbug.com/92083. In these situations the
+  // speech input extension API should be used instead.
+
+  const bool allowed = (render_view_host != NULL &&
+                        render_view_host->GetDelegate() != NULL &&
+                        render_view_host->GetDelegate()->GetRenderViewType() ==
+                            content::VIEW_TYPE_WEB_CONTENTS);
+  BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+                          base::Bind(callback, session_id, allowed));
 }

 }  // namespace speech
--- a/chrome/browser/speech/chrome_speech_recognition_manager_delegate.h
+++ b/chrome/browser/speech/chrome_speech_recognition_manager_delegate.h
@@ -12,8 +12,8 @@

 namespace speech {

-// This is Chrome's implementation of the SpeechRecognitionManager interface.
-// This class is a singleton and accessed via the Get method.
+// This is Chrome's implementation of the SpeechRecognitionManagerDelegate
+// interface.
 class ChromeSpeechRecognitionManagerDelegate
    : NON_EXPORTED_BASE(public content::SpeechRecognitionManagerDelegate),
      public SpeechRecognitionBubbleControllerDelegate {
@@ -28,27 +28,33 @@ class ChromeSpeechRecognitionManagerDelegate

 protected:
  // SpeechRecognitionManagerDelegate methods.
-  virtual void GetRequestInfo(bool* can_report_metrics,
-                              std::string* request_info) OVERRIDE;
-  virtual void ShowRecognitionRequested(int session_id,
-                                        int render_process_id,
-                                        int render_view_id,
-                                        const gfx::Rect& element_rect) OVERRIDE;
+  virtual void GetDiagnosticInformation(bool* can_report_metrics,
+                                        std::string* hardware_info) OVERRIDE;
+  virtual void CheckRecognitionIsAllowed(
+      int session_id,
+      base::Callback<void(int session_id, bool is_allowed)> callback) OVERRIDE;
+  virtual void ShowRecognitionRequested(int session_id) OVERRIDE;
  virtual void ShowWarmUp(int session_id) OVERRIDE;
  virtual void ShowRecognizing(int session_id) OVERRIDE;
  virtual void ShowRecording(int session_id) OVERRIDE;
  virtual void ShowInputVolume(int session_id,
                               float volume,
                               float noise_volume) OVERRIDE;
-  virtual void ShowMicError(int session_id,
-                            MicError error) OVERRIDE;
-  virtual void ShowRecognizerError(
-      int session_id, content::SpeechRecognitionErrorCode error) OVERRIDE;
+  virtual void ShowError(int session_id,
+                         const content::SpeechRecognitionError& error) OVERRIDE;
  virtual void DoClose(int session_id) OVERRIDE;

 private:
  class OptionalRequestInfo;

+  // Checks for VIEW_TYPE_WEB_CONTENTS host in the UI thread and notifies back
+  // the result in the IO thread through |callback|.
+  static void CheckRenderViewType(
+      int session_id,
+      base::Callback<void(int session_id, bool is_allowed)> callback,
+      int render_process_id,
+      int render_view_id);
+
  scoped_refptr<SpeechRecognitionBubbleController> bubble_controller_;
  scoped_refptr<OptionalRequestInfo> optional_request_info_;


--- a/content/browser/speech/input_tag_speech_dispatcher_host.cc
+++ b/content/browser/speech/input_tag_speech_dispatcher_host.cc
--- a/content/browser/speech/input_tag_speech_dispatcher_host.h
+++ b/content/browser/speech/input_tag_speech_dispatcher_host.h
@@ -8,6 +8,7 @@
 #include "base/memory/scoped_ptr.h"
 #include "content/common/content_export.h"
 #include "content/public/browser/browser_message_filter.h"
+#include "content/public/browser/speech_recognition_event_listener.h"
 #include "net/url_request/url_request_context_getter.h"

 struct InputTagSpeechHostMsg_StartRecognition_Params;
@@ -17,32 +18,37 @@ class SpeechRecognitionPreferences;
 struct SpeechRecognitionResult;
 }

-namespace media {
-class AudioManager;
-}
-
 namespace speech {

 class SpeechRecognitionManagerImpl;

 // InputTagSpeechDispatcherHost is a delegate for Speech API messages used by
-// RenderMessageFilter.
-// It's the complement of InputTagSpeechDispatcher (owned by RenderView).
+// RenderMessageFilter. Basically it acts as a proxy, relaying the events coming
+// from the SpeechRecognitionManager to IPC messages (and vice versa).
+// It's the complement of SpeechRecognitionDispatcher (owned by RenderView).
 class CONTENT_EXPORT InputTagSpeechDispatcherHost
-    : public content::BrowserMessageFilter {
+    : public content::BrowserMessageFilter,
+      public content::SpeechRecognitionEventListener {
 public:
-  class Sessions;
-
  InputTagSpeechDispatcherHost(
      int render_process_id,
-      net::URLRequestContextGetter* context_getter,
+      net::URLRequestContextGetter* url_request_context_getter,
      content::SpeechRecognitionPreferences* recognition_preferences);

-  // Methods called by SpeechRecognitionManagerImpl.
-  void SetRecognitionResult(int session_id,
-                            const content::SpeechRecognitionResult& result);
-  void DidCompleteRecording(int session_id);
-  void DidCompleteRecognition(int session_id);
+  // SpeechRecognitionEventListener methods.
+  virtual void OnRecognitionStart(int session_id) OVERRIDE;
+  virtual void OnAudioStart(int session_id) OVERRIDE;
+  virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
+  virtual void OnSoundStart(int session_id) OVERRIDE;
+  virtual void OnSoundEnd(int session_id) OVERRIDE;
+  virtual void OnAudioEnd(int session_id) OVERRIDE;
+  virtual void OnRecognitionEnd(int session_id) OVERRIDE;
+  virtual void OnRecognitionResult(
+      int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
+  virtual void OnRecognitionError(
+      int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
+  virtual void OnAudioLevelsChange(
+      int session_id, float volume, float noise_volume) OVERRIDE;

  // content::BrowserMessageFilter implementation.
  virtual bool OnMessageReceived(const IPC::Message& message,
@@ -66,7 +72,7 @@ class CONTENT_EXPORT InputTagSpeechDispatcherHost
  int render_process_id_;
  bool may_have_pending_requests_;  // Set if we received any speech IPC request

-  scoped_refptr<net::URLRequestContextGetter> context_getter_;
+  scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
  scoped_refptr<content::SpeechRecognitionPreferences> recognition_preferences_;

  static SpeechRecognitionManagerImpl* manager_;

--- a/content/browser/speech/speech_recognition_browsertest.cc
+++ b/content/browser/speech/speech_recognition_browsertest.cc
@@ -5,6 +5,7 @@
 #include "base/bind.h"
 #include "base/command_line.h"
 #include "base/file_path.h"
+#include "base/memory/scoped_ptr.h"
 #include "base/string_number_conversions.h"
 #include "base/synchronization/waitable_event.h"
 #include "base/utf_string_conversions.h"
@@ -16,11 +17,15 @@
 #include "content/browser/speech/speech_recognition_manager_impl.h"
 #include "content/browser/web_contents/web_contents_impl.h"
 #include "content/public/browser/notification_types.h"
+#include "content/public/browser/speech_recognition_session_config.h"
+#include "content/public/browser/speech_recognition_session_context.h"
 #include "content/public/common/content_switches.h"
 #include "content/public/common/speech_recognition_error.h"
 #include "content/public/common/speech_recognition_result.h"
 #include "third_party/WebKit/Source/WebKit/chromium/public/WebInputEvent.h"

+using content::SpeechRecognitionEventListener;
+using content::SpeechRecognitionSessionContext;
 using content::NavigationController;
 using content::WebContents;

@@ -36,7 +41,7 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
 public:
  FakeSpeechRecognitionManager()
      : session_id_(0),
-        delegate_(NULL),
+        listener_(NULL),
        did_cancel_all_(false),
        should_send_fake_response_(true),
        recognition_started_event_(false, false) {
@@ -63,23 +68,24 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
  }

  // SpeechRecognitionManager methods.
-  virtual void StartRecognition(
-      InputTagSpeechDispatcherHost* delegate,
-      int session_id,
-      int render_process_id,
-      int render_view_id,
-      const gfx::Rect& element_rect,
-      const std::string& language,
-      const std::string& grammar,
-      const std::string& origin_url,
-      net::URLRequestContextGetter* context_getter,
-      content::SpeechRecognitionPreferences* recognition_prefs) OVERRIDE {
-    VLOG(1) << "StartRecognition invoked.";
+  virtual int CreateSession(
+      const content::SpeechRecognitionSessionConfig& config,
+      SpeechRecognitionEventListener* event_listener) OVERRIDE {
+    VLOG(1) << "FAKE CreateSession invoked.";
    EXPECT_EQ(0, session_id_);
-    EXPECT_EQ(NULL, delegate_);
-    session_id_ = session_id;
-    delegate_ = delegate;
-    grammar_ = grammar;
+    EXPECT_EQ(NULL, listener_);
+    listener_ = event_listener;
+    grammar_ = config.grammar;
+    session_ctx_ = config.initial_context;
+    session_id_ = 1;
+    return session_id_;
+  }
+
+  virtual void StartSession(int session_id) OVERRIDE {
+    VLOG(1) << "FAKE StartSession invoked.";
+    EXPECT_EQ(session_id, session_id_);
+    EXPECT_TRUE(listener_ != NULL);
+
    if (should_send_fake_response_) {
      // Give the fake result in a short while.
      MessageLoop::current()->PostTask(FROM_HERE, base::Bind(
@@ -93,45 +99,69 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManagerImpl {
    }
    recognition_started_event_.Signal();
  }
-  virtual void CancelRecognition(int session_id) OVERRIDE {
-    VLOG(1) << "CancelRecognition invoked.";
+
+  virtual void AbortSession(int session_id) OVERRIDE {
+    VLOG(1) << "FAKE AbortSession invoked.";
    EXPECT_EQ(session_id_, session_id);
    session_id_ = 0;
-    delegate_ = NULL;
+    listener_ = NULL;
  }
-  virtual void StopRecording(int session_id) OVERRIDE {
+
+  virtual void StopAudioCaptureForSession(int session_id) OVERRIDE {
    VLOG(1) << "StopRecording invoked.";
    EXPECT_EQ(session_id_, session_id);
    // Nothing to do here since we aren't really recording.
  }
-  virtual void CancelAllRequestsWithDelegate(
-      InputTagSpeechDispatcherHost* delegate) OVERRIDE {
+
+  virtual void AbortAllSessionsForListener(
+      content::SpeechRecognitionEventListener* listener) OVERRIDE {
    VLOG(1) << "CancelAllRequestsWithDelegate invoked.";
-    // delegate_ is set to NULL if a fake result was received (see below), so
-    // check that delegate_ matches the incoming parameter only when there is
+    // listener_ is set to NULL if a fake result was received (see below), so
+    // check that listener_ matches the incoming parameter only when there is
    // no fake result sent.
-    EXPECT_TRUE(should_send_fake_response_ || delegate_ == delegate);
+    EXPECT_TRUE(should_send_fake_response_ || listener_ == listener);
    did_cancel_all_ = true;
  }

+  virtual void SendSessionToBackground(int session_id) OVERRIDE {}
+  virtual bool HasAudioInputDevices() OVERRIDE { return true; }
+  virtual bool IsCapturingAudio() OVERRIDE { return true; }
+  virtual string16 GetAudioInputDeviceModel() OVERRIDE { return string16(); }
+  virtual void ShowAudioInputSettings() OVERRIDE {}
+
+  virtual int LookupSessionByContext(
+      base::Callback<bool(
+          const content::SpeechRecognitionSessionContext&)> matcher)
+            const OVERRIDE {
+    bool matched = matcher.Run(session_ctx_);
+    return matched ? session_id_ : 0;
+  }
+
+  virtual content::SpeechRecognitionSessionContext GetSessionContext(
+      int session_id) const OVERRIDE {
+    EXPECT_EQ(session_id, session_id_);
+    return session_ctx_;
+  }
+
 private:
  void SetFakeRecognitionResult() {
    if (session_id_) {  // Do a check in case we were cancelled..
      VLOG(1) << "Setting fake recognition result.";
-      delegate_->DidCompleteRecording(session_id_);
+      listener_->OnAudioEnd(session_id_);
      content::SpeechRecognitionResult results;
      results.hypotheses.push_back(content::SpeechRecognitionHypothesis(
          ASCIIToUTF16(kTestResult), 1.0));
-      delegate_->SetRecognitionResult(session_id_, results);
-      delegate_->DidCompleteRecognition(session_id_);
+      listener_->OnRecognitionResult(session_id_, results);
+      listener_->OnRecognitionEnd(session_id_);
      session_id_ = 0;
-      delegate_ = NULL;
+      listener_ = NULL;
      VLOG(1) << "Finished setting fake recognition result.";
    }
  }

  int session_id_;
-  InputTagSpeechDispatcherHost* delegate_;
+  SpeechRecognitionEventListener* listener_;
+  SpeechRecognitionSessionContext session_ctx_;
  std::string grammar_;
  bool did_cancel_all_;
  bool should_send_fake_response_;

--- a/content/browser/speech/speech_recognition_manager_impl.cc
+++ b/content/browser/speech/speech_recognition_manager_impl.cc
--- a/content/browser/speech/speech_recognition_manager_impl.h
+++ b/content/browser/speech/speech_recognition_manager_impl.h
@@ -9,69 +9,66 @@
 #include <string>

 #include "base/basictypes.h"
+#include "base/callback.h"
 #include "base/compiler_specific.h"
-#include "base/memory/ref_counted.h"
-#include "base/memory/scoped_ptr.h"
 #include "base/memory/singleton.h"
 #include "content/public/browser/speech_recognition_event_listener.h"
 #include "content/public/browser/speech_recognition_manager.h"
-#include "ui/gfx/rect.h"
+#include "content/public/browser/speech_recognition_session_context.h"
+#include "content/public/common/speech_recognition_error.h"

 namespace content {
-class ResourceContext;
 class SpeechRecognitionManagerDelegate;
-class SpeechRecognitionPreferences;
-struct SpeechRecognitionResult;
-class SpeechRecognizer;
-}
-
-namespace net {
-class URLRequestContextGetter;
 }

 namespace speech {

-class InputTagSpeechDispatcherHost;
-
-class CONTENT_EXPORT SpeechRecognitionManagerImpl
-    : NON_EXPORTED_BASE(public content::SpeechRecognitionManager),
-      NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) {
+class SpeechRecognizerImpl;
+
+// This is the manager for speech recognition. It is a singleton instance in
+// the browser process and can serve several requests. Each recognition request
+// corresponds to a session, initiated via |CreateSession|.
+// In every moment the manager has at most one "interactive" session (identified
+// by |interactive_session_id_|), that is the session that is currently holding
+// user attention. For privacy reasons, only the interactive session is allowed
+// to capture audio from the microphone. However, after audio capture is
+// completed, a session can be sent to background and can live in parallel with
+// other sessions, while waiting for its results.
+//
+// More in details, SpeechRecognitionManager has the following responsibilities:
+//  - Handles requests received from various render views and makes sure only
+//    one of them accesses the audio device at any given time.
+//  - Relays recognition results/status/error events of each session to the
+//    corresponding listener (demuxing on the base of their session_id).
+//  - Handles the instantiation of SpeechRecognitionEngine objects when
+//    requested by SpeechRecognitionSessions.
+class CONTENT_EXPORT SpeechRecognitionManagerImpl :
+    public NON_EXPORTED_BASE(content::SpeechRecognitionManager),
+    public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {
 public:
  static SpeechRecognitionManagerImpl* GetInstance();

-  // SpeechRecognitionManager implementation:
-  virtual void StartRecognitionForRequest(int session_id) OVERRIDE;
-  virtual void CancelRecognitionForRequest(int session_id) OVERRIDE;
-  virtual void FocusLostForRequest(int session_id) OVERRIDE;
+  // SpeechRecognitionManager implementation.
+  virtual int CreateSession(
+      const content::SpeechRecognitionSessionConfig& config,
+      SpeechRecognitionEventListener* event_listener) OVERRIDE;
+  virtual void StartSession(int session_id) OVERRIDE;
+  virtual void AbortSession(int session_id) OVERRIDE;
+  virtual void AbortAllSessionsForListener(
+        content::SpeechRecognitionEventListener* listener) OVERRIDE;
+  virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
+  virtual void SendSessionToBackground(int session_id) OVERRIDE;
+  virtual content::SpeechRecognitionSessionContext GetSessionContext(
+      int session_id) const OVERRIDE;
+  virtual int LookupSessionByContext(
+      base::Callback<bool(
+          const content::SpeechRecognitionSessionContext&)> matcher)
+            const OVERRIDE;
  virtual bool HasAudioInputDevices() OVERRIDE;
  virtual bool IsCapturingAudio() OVERRIDE;
  virtual string16 GetAudioInputDeviceModel() OVERRIDE;
  virtual void ShowAudioInputSettings() OVERRIDE;

-  // Handlers for requests from render views.
-
-  // |delegate| is a weak pointer and should remain valid until
-  // its |DidCompleteRecognition| method is called or recognition is cancelled.
-  // |render_process_id| is the ID of the renderer process initiating the
-  // request.
-  // |element_rect| is the display bounds of the html element requesting speech
-  // input (in page coordinates).
-  virtual void StartRecognition(
-      InputTagSpeechDispatcherHost* delegate,
-      int session_id,
-      int render_process_id,
-      int render_view_id,
-      const gfx::Rect& element_rect,
-      const std::string& language,
-      const std::string& grammar,
-      const std::string& origin_url,
-      net::URLRequestContextGetter* context_getter,
-      content::SpeechRecognitionPreferences* speech_recognition_prefs);
-  virtual void CancelRecognition(int session_id);
-  virtual void CancelAllRequestsWithDelegate(
-      InputTagSpeechDispatcherHost* delegate);
-  virtual void StopRecording(int session_id);
-
  // SpeechRecognitionEventListener methods.
  virtual void OnRecognitionStart(int session_id) OVERRIDE;
  virtual void OnAudioStart(int session_id) OVERRIDE;
@@ -84,8 +81,8 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
      int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;
  virtual void OnRecognitionError(
      int session_id, const content::SpeechRecognitionError& error) OVERRIDE;
-  virtual void OnAudioLevelsChange(
-      int session_id, float volume, float noise_volume) OVERRIDE;
+  virtual void OnAudioLevelsChange(int session_id, float volume,
+                                   float noise_volume) OVERRIDE;

 protected:
  // Private constructor to enforce singleton.
@@ -93,34 +90,85 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
  SpeechRecognitionManagerImpl();
  virtual ~SpeechRecognitionManagerImpl();

-  bool HasPendingRequest(int session_id) const;
-
 private:
-  struct Request {
-    Request();
-    ~Request();
+  // Data types for the internal Finite State Machine (FSM).
+  enum FSMState {
+    STATE_IDLE = 0,
+    STATE_INTERACTIVE,
+    STATE_BACKGROUND,
+    STATE_WAITING_FOR_DELETION,
+    STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION
+  };

-    InputTagSpeechDispatcherHost* delegate;
-    scoped_refptr<content::SpeechRecognizer> recognizer;
-    bool is_active;  // Set to true when recording or recognition is going on.
+  enum FSMEvent {
+    EVENT_ABORT = 0,
+    EVENT_START,
+    EVENT_STOP_CAPTURE,
+    EVENT_SET_BACKGROUND,
+    EVENT_RECOGNITION_ENDED,
+    EVENT_RECOGNITION_RESULT,
+    EVENT_RECOGNITION_ERROR,
+    EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR
  };

-  struct SpeechRecognitionParams;
+  struct Session {
+    Session();
+    ~Session();

-  InputTagSpeechDispatcherHost* GetDelegate(int session_id) const;
+    int id;
+    content::SpeechRecognitionEventListener* event_listener;
+    content::SpeechRecognitionSessionContext context;
+    scoped_refptr<SpeechRecognizerImpl> recognizer;
+    FSMState state;
+    bool error_occurred;
+  };

-  void CheckRenderViewTypeAndStartRecognition(
-      const SpeechRecognitionParams& params);
-  void ProceedStartingRecognition(const SpeechRecognitionParams& params);
+  struct FSMEventArgs {
+    explicit FSMEventArgs(FSMEvent event_value);
+    ~FSMEventArgs();

-  void CancelRecognitionAndInformDelegate(int session_id);
+    FSMEvent event;
+    content::SpeechRecognitionError speech_error;
+  };

-  typedef std::map<int, Request> SpeechRecognizerMap;
-  SpeechRecognizerMap requests_;
-  std::string request_info_;
-  bool can_report_metrics_;
-  int recording_session_id_;
-  scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;
+  // Callback issued by the SpeechRecognitionManagerDelegate for reporting
+  // asynchronously the result of the CheckRecognitionIsAllowed call.
+  void RecognitionAllowedCallback(int session_id, bool is_allowed);
+
+  // Entry point for pushing any external event into the session handling FSM.
+  void DispatchEvent(int session_id, FSMEventArgs args);
+
+  // Defines the behavior of the session handling FSM, selecting the appropriate
+  // transition according to the session, its current state and the event.
+  FSMState ExecuteTransitionAndGetNextState(Session& session,
+                                            const FSMEventArgs& event_args);
+
+  // The methods below handle transitions of the session handling FSM.
+  FSMState SessionStart(Session& session, const FSMEventArgs& event_args);
+  FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);
+  FSMState SessionStopAudioCapture(Session& session,
+                                   const FSMEventArgs& event_args);
+  FSMState SessionAbortIfCapturingAudioOrBackground(
+      Session& session, const FSMEventArgs& event_args);
+  FSMState SessionSetBackground(Session& session,
+                                const FSMEventArgs& event_args);
+  FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);
+  FSMState SessionReportNoMatch(Session& session,
+                                const FSMEventArgs& event_args);
+  FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);
+  FSMState DoNothing(Session& session, const FSMEventArgs& event_args);
+  FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);
+
+  bool SessionExists(int session_id) const;
+  content::SpeechRecognitionEventListener* GetListener(int session_id) const;
+  int GetNextSessionID();
+
+  typedef std::map<int, Session> SessionsTable;
+  SessionsTable sessions_;
+  int interactive_session_id_;
+  int last_session_id_;
+  bool is_dispatching_event_;
+  content::SpeechRecognitionManagerDelegate* delegate_;
 };

 }  // namespace speech

--- a/content/content_browser.gypi
+++ b/content/content_browser.gypi
@@ -133,6 +133,9 @@
    'public/browser/speech_recognition_manager.h',
    'public/browser/speech_recognition_manager_delegate.h',
    'public/browser/speech_recognition_preferences.h',
+    'public/browser/speech_recognition_session_config.cc',
+    'public/browser/speech_recognition_session_config.h',
+    'public/browser/speech_recognition_session_context.h',
    'public/browser/speech_recognizer.h',
    'public/browser/trace_controller.h',
    'public/browser/trace_subscriber.h',

--- a/content/public/browser/speech_recognition_manager.h
+++ b/content/public/browser/speech_recognition_manager.h
@@ -6,28 +6,70 @@
 #define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_MANAGER_H_

 #include "base/string16.h"
+#include "base/callback.h"
 #include "content/common/content_export.h"
+#include "content/public/common/speech_recognition_result.h"

 namespace content {

-// This is the gatekeeper for speech recognition in the browser process. It
-// handles requests received from various render views and makes sure only one
-// of them can use speech recognition at a time. It also sends recognition
-// results and status events to the render views when required.
+class SpeechRecognitionEventListener;
+struct SpeechRecognitionSessionConfig;
+struct SpeechRecognitionSessionContext;
+
+// The SpeechRecognitionManager (SRM) is a singleton class that handles SR
+// functionalities within Chrome. Everyone that needs to perform SR should
+// interface exclusively with the SRM, receiving events through the callback
+// interface SpeechRecognitionEventListener.
+// Since many different sources can use SR in different times (some overlapping
+// is allowed while waiting for results), the SRM has the further responsibility
+// of handling separately and reliably (taking into account also call sequences
+// that might not make sense, e.g., two subsequent AbortSession calls).
+// In this sense a session, within the SRM, models the ongoing evolution of a
+// SR request from the viewpoint of the end-user, abstracting all the concrete
+// operations that must be carried out, that will be handled by inner classes.
 class SpeechRecognitionManager {
 public:
+  static const int kSessionIDInvalid;
+
  // Returns the singleton instance.
-  CONTENT_EXPORT static SpeechRecognitionManager* GetInstance();
+  static CONTENT_EXPORT SpeechRecognitionManager* GetInstance();
+
+  // Creates a new recognition session.
+  virtual int CreateSession(const SpeechRecognitionSessionConfig& config,
+                            SpeechRecognitionEventListener* listener) = 0;
+
+  // Starts/restarts recognition for an existing session, after performing a
+  // premilinary check on the delegate (CheckRecognitionIsAllowed).
+  virtual void StartSession(int session_id) = 0;
+
+  // Aborts recognition for an existing session, without providing any result.
+  virtual void AbortSession(int session_id) = 0;
+
+  // Aborts all sessions for a given listener, without providing any result.
+  virtual void AbortAllSessionsForListener(
+      SpeechRecognitionEventListener* listener) = 0;
+
+  // Stops audio capture for an existing session. The audio captured before the
+  // call will be processed, possibly ending up with a result.
+  virtual void StopAudioCaptureForSession(int session_id) = 0;

-  // Starts/restarts recognition for an existing request.
-  virtual void StartRecognitionForRequest(int session_id) = 0;
+  // Sends the session to background preventing it from further interacting with
+  // the browser (typically invoked when the user clicks outside the speech UI).
+  // The session will be silently continued in background if possible (in case
+  // it already finished capturing audio and was just waiting for the result) or
+  // will be aborted if user interaction (e.g., audio recording) was involved
+  // when this function was called.
+  virtual void SendSessionToBackground(int session_id) = 0;

-  // Cancels recognition for an existing request.
-  virtual void CancelRecognitionForRequest(int session_id) = 0;
+  // Retrieves the context associated to a session.
+  virtual SpeechRecognitionSessionContext GetSessionContext(
+      int session_id) const = 0;

-  // Called when the user clicks outside the speech input UI causing it to close
-  // and possibly have speech input go to another element.
-  virtual void FocusLostForRequest(int session_id) = 0;
+  // Looks-up an existing session using a caller-provided matcher function.
+  virtual int LookupSessionByContext(
+      base::Callback<bool(
+          const content::SpeechRecognitionSessionContext&)> matcher)
+            const = 0;

  // Returns true if the OS reports existence of audio recording devices.
  virtual bool HasAudioInputDevices() = 0;

--- a/content/public/browser/speech_recognition_manager_delegate.h
+++ b/content/public/browser/speech_recognition_manager_delegate.h
@@ -8,12 +8,9 @@

 #include <string>

+#include "base/callback_forward.h"
 #include "content/public/common/speech_recognition_error.h"

-namespace gfx {
-class Rect;
-}
-
 namespace content {

 struct SpeechRecognitionResult;
@@ -22,26 +19,20 @@ struct SpeechRecognitionResult;
 // user's permission and for fetching optional request information.
 class SpeechRecognitionManagerDelegate {
 public:
-  // Describes the microphone errors that are reported via ShowMicError.
-  enum MicError {
-    MIC_ERROR_NO_DEVICE_AVAILABLE = 0,
-    MIC_ERROR_DEVICE_IN_USE
-  };
-
  virtual ~SpeechRecognitionManagerDelegate() {}

-  // Get the optional request information if available.
-  virtual void GetRequestInfo(bool* can_report_metrics,
-                              std::string* request_info) = 0;
+  // Get the optional diagnostic hardware information if available.
+  virtual void GetDiagnosticInformation(bool* can_report_metrics,
+                                        std::string* hardware_info) = 0;

-  // Called when recognition has been requested from point |element_rect_| on
-  // the view port for the given caller. The embedder should call the
-  // StartRecognition or CancelRecognition methods on SpeechInutManager in
-  // response.
-  virtual void ShowRecognitionRequested(int session_id,
-                                        int render_process_id,
-                                        int render_view_id,
-                                        const gfx::Rect& element_rect) = 0;
+  // Called when recognition has been requested. The source point of the view
+  // port can be retrieved looking-up the session context.
+  virtual void ShowRecognitionRequested(int session_id) = 0;
+
+  // Checks (asynchronously) if current setup allows speech recognition.
+  virtual void CheckRecognitionIsAllowed(
+      int session_id,
+      base::Callback<void(int session_id, bool is_allowed)> callback) = 0;

  // Called when recognition is starting up.
  virtual void ShowWarmUp(int session_id) = 0;
@@ -57,12 +48,9 @@ class SpeechRecognitionManagerDelegate {
                               float volume,
                               float noise_volume) = 0;

-  // Called when no microphone has been found.
-  virtual void ShowMicError(int session_id, MicError error) = 0;
-
  // Called when there has been a error with the recognition.
-  virtual void ShowRecognizerError(int session_id,
-                                   SpeechRecognitionErrorCode error) = 0;
+  virtual void ShowError(int session_id,
+                         const SpeechRecognitionError& error) = 0;

  // Called when recognition has ended or has been canceled.
  virtual void DoClose(int session_id) = 0;

--- a/content/public/browser/speech_recognition_session_config.cc
+++ b/content/public/browser/speech_recognition_session_config.cc
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/public/browser/speech_recognition_session_config.h"
+#include "net/url_request/url_request_context_getter.h"
+
+namespace content {
+
+SpeechRecognitionSessionConfig::SpeechRecognitionSessionConfig()
+    : filter_profanities(false) {
+}
+
+SpeechRecognitionSessionConfig::~SpeechRecognitionSessionConfig() {
+}
+
+}  // namespace content
--- a/content/public/browser/speech_recognition_session_config.h
+++ b/content/public/browser/speech_recognition_session_config.h
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
+#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
+
+#include "base/basictypes.h"
+#include "base/memory/ref_counted.h"
+#include "content/common/content_export.h"
+#include "content/public/browser/speech_recognition_session_context.h"
+
+namespace net {
+class URLRequestContextGetter;
+}
+
+namespace content {
+
+// Configuration params for creating a new speech recognition session.
+struct CONTENT_EXPORT SpeechRecognitionSessionConfig {
+  SpeechRecognitionSessionConfig();
+  ~SpeechRecognitionSessionConfig();
+
+  std::string language;
+  std::string grammar;
+  std::string origin_url;
+  bool filter_profanities;
+  SpeechRecognitionSessionContext initial_context;
+  scoped_refptr<net::URLRequestContextGetter> url_request_context_getter;
+};
+
+}  // namespace content
+
+#endif  // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONFIG_H_
--- a/content/public/browser/speech_recognition_session_context.h
+++ b/content/public/browser/speech_recognition_session_context.h
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
+#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_
+
+#include "content/common/content_export.h"
+#include "ui/gfx/rect.h"
+
+namespace content {
+
+// The context information required by clients of the SpeechRecognitionManager
+// (InputTagSpeechDispatcherHost) and its delegates for mapping the recognition
+// session to other browser elements involved with the it (e.g., the page
+// element that requested the recognition). The SpeechRecognitionManager is
+// not aware of the content of this struct and does NOT use it for its purposes.
+// However the manager keeps this struct "attached" to the recognition session
+// during all the session lifetime, making its contents available to clients
+// (In this regard, see SpeechRecognitionManager::GetSessionContext and
+// SpeechRecognitionManager::LookupSessionByContext methods).
+struct CONTENT_EXPORT SpeechRecognitionSessionContext {
+  SpeechRecognitionSessionContext()
+      : render_process_id(0),
+        render_view_id(0),
+        render_request_id(0) {}
+  ~SpeechRecognitionSessionContext() {}
+
+  int render_process_id;
+  int render_view_id;
+  int render_request_id;
+  gfx::Rect element_rect;
+};
+
+}  // namespace content
+
+#endif  // CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_SESSION_CONTEXT_H_