Commit 3b13cccc authored by Xiaohui Chen's avatar Xiaohui Chen Committed by Commit Bot

assistant: add initial DSP support

* Add a feature to control this, default off
* Move audio_input_impl to its own file since it grows big
* Introduce hotword_state_manager that controls DSP input device
  state.  For DSP device, it works as follows:
    - Open input device with HOTWORD effect
    - Input device won't forward audio data until OKG is detected
    - Once OKG is detected, input device behaves like a regular input
      device and constantly streams audio data, note that the preamble
      of OKG itself is included in the initial audio data
    - When a conversation is finished, we need to destroy and recreated
      the audio input to arm DSP again.

Bug: b/77916222
Test: locally build and run on eve
Change-Id: I4ce17fba21fbc584cde9178c2aca53a6ed410fe9
Reviewed-on: https://chromium-review.googlesource.com/c/1334566Reviewed-by: default avatarTao Wu <wutao@chromium.org>
Commit-Queue: Xiaohui Chen <xiaohuic@chromium.org>
Cr-Commit-Position: refs/heads/master@{#608864}
parent b2fa0306
......@@ -27,6 +27,7 @@ source_set("lib") {
"//build/util:webkit_version",
"//chromeos",
"//chromeos/assistant:buildflags",
"//chromeos/services/assistant/public:feature_flags",
"//chromeos/services/assistant/public/mojom",
"//components/account_id",
"//services/device/public/mojom",
......@@ -53,6 +54,8 @@ source_set("lib") {
"chromium_http_connection.h",
"default_url_request_context_getter.cc",
"default_url_request_context_getter.h",
"platform/audio_input_impl.cc",
"platform/audio_input_impl.h",
"platform/audio_input_provider_impl.cc",
"platform/audio_input_provider_impl.h",
"platform/audio_media_data_source.cc",
......
......@@ -411,12 +411,6 @@ void AssistantManagerServiceImpl::OnConversationTurnStarted(bool is_mic_open) {
void AssistantManagerServiceImpl::OnConversationTurnFinished(
Resolution resolution) {
// TODO(updowndota): Find a better way to handle the edge cases.
if (resolution != Resolution::NORMAL_WITH_FOLLOW_ON &&
resolution != Resolution::CANCELLED &&
resolution != Resolution::BARGE_IN) {
platform_api_->SetMicState(false);
}
main_thread_task_runner_->PostTask(
FROM_HERE,
base::BindOnce(
......@@ -938,13 +932,28 @@ void AssistantManagerServiceImpl::OnTimerSoundingFinished() {
void AssistantManagerServiceImpl::OnConversationTurnStartedOnMainThread(
bool is_mic_open) {
platform_api_->GetAudioInputProvider()
.GetAudioInput()
.OnConversationTurnStarted();
interaction_subscribers_.ForAllPtrs([is_mic_open](auto* ptr) {
ptr->OnInteractionStarted(/*is_voice_interaction=*/is_mic_open);
});
}
void AssistantManagerServiceImpl::OnConversationTurnFinishedOnMainThread(
Resolution resolution) {
assistant_client::ConversationStateListener::Resolution resolution) {
// TODO(updowndota): Find a better way to handle the edge cases.
if (resolution != Resolution::NORMAL_WITH_FOLLOW_ON &&
resolution != Resolution::CANCELLED &&
resolution != Resolution::BARGE_IN) {
platform_api_->SetMicState(false);
}
platform_api_->GetAudioInputProvider()
.GetAudioInput()
.OnConversationTurnFinished();
switch (resolution) {
// Interaction ended normally.
case Resolution::NORMAL:
......
This diff is collapsed.
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_SERVICES_ASSISTANT_PLATFORM_AUDIO_INPUT_IMPL_H_
#define CHROMEOS_SERVICES_ASSISTANT_PLATFORM_AUDIO_INPUT_IMPL_H_
#include <memory>
#include <string>
#include <vector>
#include "base/macros.h"
#include "base/observer_list.h"
#include "base/sequence_checker.h"
#include "base/synchronization/lock.h"
#include "base/time/time.h"
#include "chromeos/services/assistant/public/mojom/assistant.mojom.h"
#include "libassistant/shared/public/platform_audio_input.h"
#include "media/base/audio_capturer_source.h"
#include "mojo/public/cpp/bindings/binding.h"
namespace service_manager {
class Connector;
} // namespace service_manager
namespace chromeos {
namespace assistant {
class AudioInputImpl : public assistant_client::AudioInput,
public media::AudioCapturerSource::CaptureCallback {
public:
explicit AudioInputImpl(service_manager::Connector* connector);
~AudioInputImpl() override;
class HotwordStateManager {
public:
HotwordStateManager() = default;
virtual ~HotwordStateManager() = default;
virtual void OnConversationTurnStarted() {}
virtual void OnConversationTurnFinished() {}
virtual void OnCaptureDataArrived() {}
private:
DISALLOW_COPY_AND_ASSIGN(HotwordStateManager);
};
// media::AudioCapturerSource::CaptureCallback overrides:
void Capture(const media::AudioBus* audio_source,
int audio_delay_milliseconds,
double volume,
bool key_pressed) override;
void OnCaptureError(const std::string& message) override;
void OnCaptureMuted(bool is_muted) override;
// assistant_client::AudioInput overrides. These function are called by
// assistant from assistant thread, for which we should not assume any
// //base related thread context to be in place.
assistant_client::BufferFormat GetFormat() const override;
void AddObserver(assistant_client::AudioInput::Observer* observer) override;
void RemoveObserver(
assistant_client::AudioInput::Observer* observer) override;
// Called when the mic state associated with the interaction is changed.
void SetMicState(bool mic_open);
void OnConversationTurnStarted();
void OnConversationTurnFinished();
// Called when hotword enabled status changed.
void OnHotwordEnabled(bool enable);
void RecreateAudioInputStream(bool use_dsp);
private:
void StartRecording();
void StopRecording();
void UpdateRecordingState();
scoped_refptr<media::AudioCapturerSource> source_;
// Should audio input always recording actively.
bool default_on_ = false;
// User explicitly requested to open microphone.
bool mic_open_ = false;
// Guards observers_;
base::Lock lock_;
std::vector<assistant_client::AudioInput::Observer*> observers_;
// This is the total number of frames captured during the life time of this
// object. We don't worry about overflow because this count is only used for
// logging purposes. If in the future this changes, we should re-evaluate.
int captured_frames_count_ = 0;
base::TimeTicks last_frame_count_report_time_;
// To be initialized on assistant thread the first call to AddObserver.
// It ensures that AddObserver / RemoveObserver are called on the same
// sequence.
SEQUENCE_CHECKER(observer_sequence_checker_);
service_manager::Connector* connector_;
scoped_refptr<base::SequencedTaskRunner> task_runner_;
std::unique_ptr<HotwordStateManager> state_manager_;
base::WeakPtrFactory<AudioInputImpl> weak_factory_;
DISALLOW_COPY_AND_ASSIGN(AudioInputImpl);
};
} // namespace assistant
} // namespace chromeos
#endif // CHROMEOS_SERVICES_ASSISTANT_PLATFORM_AUDIO_INPUT_IMPL_H_
......@@ -4,204 +4,16 @@
#include "chromeos/services/assistant/platform/audio_input_provider_impl.h"
#include <utility>
#include "base/logging.h"
#include "base/stl_util.h"
#include "libassistant/shared/public/platform_audio_buffer.h"
#include "media/audio/audio_device_description.h"
#include "media/base/audio_parameters.h"
#include "media/base/audio_sample_types.h"
#include "media/base/channel_layout.h"
#include "services/audio/public/cpp/device_factory.h"
#include "services/service_manager/public/cpp/connector.h"
namespace chromeos {
namespace assistant {
namespace {
constexpr assistant_client::BufferFormat kFormat{
16000 /* sample_rate */, assistant_client::INTERLEAVED_S32, 1 /* channels */
};
} // namespace
AudioInputBufferImpl::AudioInputBufferImpl(const void* data,
uint32_t frame_count)
: data_(data), frame_count_(frame_count) {}
AudioInputBufferImpl::~AudioInputBufferImpl() = default;
assistant_client::BufferFormat AudioInputBufferImpl::GetFormat() const {
return kFormat;
}
const void* AudioInputBufferImpl::GetData() const {
return data_;
}
void* AudioInputBufferImpl::GetWritableData() {
NOTREACHED();
return nullptr;
}
int AudioInputBufferImpl::GetFrameCount() const {
return frame_count_;
}
AudioInputImpl::AudioInputImpl(
std::unique_ptr<service_manager::Connector> connector)
: source_(audio::CreateInputDevice(
std::move(connector),
media::AudioDeviceDescription::kDefaultDeviceId)),
task_runner_(base::ThreadTaskRunnerHandle::Get()),
weak_factory_(this) {
DETACH_FROM_SEQUENCE(observer_sequence_checker_);
// AUDIO_PCM_LINEAR and AUDIO_PCM_LOW_LATENCY are the same on CRAS.
source_->Initialize(
media::AudioParameters(
media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_MONO, kFormat.sample_rate,
kFormat.sample_rate / 10 /* buffer size for 100 ms */),
this);
}
AudioInputImpl::~AudioInputImpl() {
DCHECK(task_runner_->RunsTasksInCurrentSequence());
source_->Stop();
VLOG(1) << "Ending captured frames: " << captured_frames_count_;
}
void AudioInputImpl::Capture(const media::AudioBus* audio_source,
int audio_delay_milliseconds,
double volume,
bool key_pressed) {
DCHECK_EQ(kFormat.num_channels, audio_source->channels());
std::vector<int32_t> buffer(kFormat.num_channels * audio_source->frames());
audio_source->ToInterleaved<media::SignedInt32SampleTypeTraits>(
audio_source->frames(), buffer.data());
int64_t time = base::TimeTicks::Now().since_origin().InMilliseconds() -
audio_delay_milliseconds;
AudioInputBufferImpl input_buffer(buffer.data(), audio_source->frames());
{
base::AutoLock lock(lock_);
for (auto* observer : observers_)
observer->OnBufferAvailable(input_buffer, time);
}
captured_frames_count_ += audio_source->frames();
if (VLOG_IS_ON(1)) {
auto now = base::TimeTicks::Now();
if ((now - last_frame_count_report_time_) >
base::TimeDelta::FromMinutes(2)) {
VLOG(1) << "Captured frames: " << captured_frames_count_;
last_frame_count_report_time_ = now;
}
}
}
void AudioInputImpl::OnCaptureError(const std::string& message) {
LOG(ERROR) << "Capture error " << message;
base::AutoLock lock(lock_);
for (auto* observer : observers_)
observer->OnError(AudioInput::Error::FATAL_ERROR);
}
void AudioInputImpl::OnCaptureMuted(bool is_muted) {}
assistant_client::BufferFormat AudioInputImpl::GetFormat() const {
return kFormat;
}
void AudioInputImpl::AddObserver(
assistant_client::AudioInput::Observer* observer) {
DCHECK_CALLED_ON_VALID_SEQUENCE(observer_sequence_checker_);
VLOG(1) << "Add observer";
bool should_start = false;
{
base::AutoLock lock(lock_);
observers_.push_back(observer);
should_start = observers_.size() == 1;
}
if (default_on_ && should_start) {
// Post to main thread runner to start audio recording. Assistant thread
// does not have thread context defined in //base and will fail sequence
// check in AudioCapturerSource::Start().
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioInputImpl::StartRecording,
weak_factory_.GetWeakPtr()));
}
}
void AudioInputImpl::RemoveObserver(
assistant_client::AudioInput::Observer* observer) {
DCHECK_CALLED_ON_VALID_SEQUENCE(observer_sequence_checker_);
VLOG(1) << "Remove observer";
bool should_stop = false;
{
base::AutoLock lock(lock_);
base::Erase(observers_, observer);
should_stop = observers_.empty();
}
if (should_stop) {
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioInputImpl::StopRecording,
weak_factory_.GetWeakPtr()));
// Reset the sequence checker since assistant may call from different thread
// after restart.
DETACH_FROM_SEQUENCE(observer_sequence_checker_);
}
}
void AudioInputImpl::SetMicState(bool mic_open) {
if (!default_on_) {
if (mic_open) {
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioInputImpl::StartRecording,
weak_factory_.GetWeakPtr()));
} else {
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioInputImpl::StopRecording,
weak_factory_.GetWeakPtr()));
}
}
}
void AudioInputImpl::OnHotwordEnabled(bool enable) {
default_on_ = enable;
if (default_on_) {
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioInputImpl::StartRecording,
weak_factory_.GetWeakPtr()));
} else {
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioInputImpl::StopRecording,
weak_factory_.GetWeakPtr()));
}
}
void AudioInputImpl::StartRecording() {
DCHECK(task_runner_->RunsTasksInCurrentSequence());
VLOG(1) << "Start recording";
source_->Start();
}
void AudioInputImpl::StopRecording() {
DCHECK(task_runner_->RunsTasksInCurrentSequence());
VLOG(1) << "Stop recording";
source_->Stop();
}
AudioInputProviderImpl::AudioInputProviderImpl(
service_manager::Connector* connector)
: audio_input_(connector->Clone()) {}
: audio_input_(connector) {}
AudioInputProviderImpl::~AudioInputProviderImpl() = default;
assistant_client::AudioInput& AudioInputProviderImpl::GetAudioInput() {
AudioInputImpl& AudioInputProviderImpl::GetAudioInput() {
return audio_input_;
}
......
......@@ -10,109 +10,23 @@
#include <vector>
#include "base/macros.h"
#include "base/observer_list.h"
#include "base/sequence_checker.h"
#include "base/synchronization/lock.h"
#include "base/time/time.h"
#include "chromeos/services/assistant/public/mojom/assistant.mojom.h"
#include "chromeos/services/assistant/platform/audio_input_impl.h"
#include "libassistant/shared/public/platform_audio_input.h"
#include "media/base/audio_capturer_source.h"
#include "mojo/public/cpp/bindings/binding.h"
namespace service_manager {
class Connector;
} // namespace service_manager
namespace media {
class AudioBus;
} // namespace media
namespace chromeos {
namespace assistant {
class AudioInputBufferImpl : public assistant_client::AudioBuffer {
public:
AudioInputBufferImpl(const void* data, uint32_t frame_count);
~AudioInputBufferImpl() override;
// assistant_client::AudioBuffer overrides:
assistant_client::BufferFormat GetFormat() const override;
const void* GetData() const override;
void* GetWritableData() override;
int GetFrameCount() const override;
private:
const void* data_;
int frame_count_;
DISALLOW_COPY_AND_ASSIGN(AudioInputBufferImpl);
};
class AudioInputImpl : public assistant_client::AudioInput,
public media::AudioCapturerSource::CaptureCallback {
public:
explicit AudioInputImpl(
std::unique_ptr<service_manager::Connector> connector);
~AudioInputImpl() override;
// media::AudioCapturerSource::CaptureCallback overrides:
void Capture(const media::AudioBus* audio_source,
int audio_delay_milliseconds,
double volume,
bool key_pressed) override;
void OnCaptureError(const std::string& message) override;
void OnCaptureMuted(bool is_muted) override;
// assistant_client::AudioInput overrides. These function are called by
// assistant from assistant thread, for which we should not assume any
// //base related thread context to be in place.
assistant_client::BufferFormat GetFormat() const override;
void AddObserver(assistant_client::AudioInput::Observer* observer) override;
void RemoveObserver(
assistant_client::AudioInput::Observer* observer) override;
// Called when the mic state associated with the interaction is changed.
void SetMicState(bool mic_open);
// Called when hotword enabled status changed.
void OnHotwordEnabled(bool enable);
private:
void StartRecording();
void StopRecording();
scoped_refptr<media::AudioCapturerSource> source_;
// Should audio input always recording actively.
bool default_on_ = false;
// Guards observers_;
base::Lock lock_;
std::vector<assistant_client::AudioInput::Observer*> observers_;
// This is the total number of frames captured during the life time of this
// object. We don't worry about overflow because this count is only used for
// logging purposes. If in the future this changes, we should re-evaluate.
int captured_frames_count_ = 0;
base::TimeTicks last_frame_count_report_time_;
// To be initialized on assistant thread the first call to AddObserver.
// It ensures that AddObserver / RemoveObserver are called on the same
// sequence.
SEQUENCE_CHECKER(observer_sequence_checker_);
scoped_refptr<base::SequencedTaskRunner> task_runner_;
base::WeakPtrFactory<AudioInputImpl> weak_factory_;
DISALLOW_COPY_AND_ASSIGN(AudioInputImpl);
};
class AudioInputProviderImpl : public assistant_client::AudioInputProvider {
public:
explicit AudioInputProviderImpl(service_manager::Connector* connector);
~AudioInputProviderImpl() override;
// assistant_client::AudioInputProvider overrides:
assistant_client::AudioInput& GetAudioInput() override;
AudioInputImpl& GetAudioInput() override;
int64_t GetCurrentAudioTime() override;
// Called when the mic state associated with the interaction is changed.
......
......@@ -83,7 +83,7 @@ PlatformApiImpl::PlatformApiImpl(
PlatformApiImpl::~PlatformApiImpl() = default;
AudioInputProvider& PlatformApiImpl::GetAudioInputProvider() {
AudioInputProviderImpl& PlatformApiImpl::GetAudioInputProvider() {
return audio_input_provider_;
}
......
......@@ -38,7 +38,7 @@ class PlatformApiImpl : public assistant_client::PlatformApi {
~PlatformApiImpl() override;
// assistant_client::PlatformApi overrides
assistant_client::AudioInputProvider& GetAudioInputProvider() override;
AudioInputProviderImpl& GetAudioInputProvider() override;
assistant_client::AudioOutputProvider& GetAudioOutputProvider() override;
assistant_client::AuthProvider& GetAuthProvider() override;
assistant_client::FileProvider& GetFileProvider() override;
......
......@@ -4,14 +4,22 @@
#include "chromeos/services/assistant/public/features.h"
#include "base/feature_list.h"
namespace chromeos {
namespace assistant {
namespace features {
// Enables Assistant voice match enrollment.
const base::Feature kAssistantVoiceMatch{"AssistantVoiceMatch",
base::FEATURE_DISABLED_BY_DEFAULT};
const base::Feature kEnableDspHotword{"EnableDspHotword",
base::FEATURE_DISABLED_BY_DEFAULT};
bool IsDspHotwordEnabled() {
return base::FeatureList::IsEnabled(kEnableDspHotword);
}
} // namespace features
} // namespace assistant
} // namespace chromeos
......@@ -11,8 +11,14 @@ namespace chromeos {
namespace assistant {
namespace features {
// Enables Assistant voice match enrollment.
extern const base::Feature kAssistantVoiceMatch;
// Enables DSP for hotword detection.
extern const base::Feature kEnableDspHotword;
bool IsDspHotwordEnabled();
} // namespace features
} // namespace assistant
} // namespace chromeos
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment