Commit bab53dd2 authored by Rob Schonberger's avatar Rob Schonberger Committed by Chromium LUCI CQ

ml: Add Soda Recognizer client mojom to Chrome.

Adds in the soda speech recognizer client to chrome, talking to ml
service, with appropriate mojo bindings.

Add service connection bindings with appropriate fakes, and unittests
for creation (along with appropriate expectations on creation for
fake.

TEST=tested via the unit test /  compilation here.

Bug: 1152254
Change-Id: Ib39362e3aebef732efe4eaba29ca073653737a81
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2562924
Commit-Queue: Rob Schonberger <robsc@chromium.org>
Reviewed-by: default avatarSam McNally <sammc@chromium.org>
Reviewed-by: default avatarXiyuan Xia <xiyuan@chromium.org>
Reviewed-by: default avatarcalamity <calamity@chromium.org>
Cr-Commit-Position: refs/heads/master@{#834990}
parent e1877c9a
......@@ -531,6 +531,7 @@
<include name="IDR_MACHINE_LEARNING_INTERNALS_MACHINE_LEARNING_SERVICE_MOJO_JS" file="${root_gen_dir}\chromeos\services\machine_learning\public\mojom\machine_learning_service.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_MACHINE_LEARNING_INTERNALS_MODEL_MOJO_JS" file="${root_gen_dir}\chromeos\services\machine_learning\public\mojom\model.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_MACHINE_LEARNING_INTERNALS_PAGE_HANDLER_MOJO_JS" file="${root_gen_dir}\chrome\browser\ui\webui\chromeos\machine_learning\machine_learning_internals_page_handler.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_MACHINE_LEARNING_INTERNALS_SODA_MOJO_JS" file="${root_gen_dir}\chromeos\services\machine_learning\public\mojom\soda.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_MACHINE_LEARNING_INTERNALS_TENSOR_MOJO_JS" file="${root_gen_dir}\chromeos\services\machine_learning\public\mojom\tensor.mojom-lite.js" use_base_dir="false" type="BINDATA" />
<include name="IDR_MACHINE_LEARNING_INTERNALS_TEST_MODEL_TAB_JS" file="resources\chromeos\machine_learning\test_model_tab.js" type="BINDATA" />
<include name="IDR_MACHINE_LEARNING_INTERNALS_TIME_MOJO_JS" file="${root_gen_dir}\mojo/public/mojom/base/time.mojom-lite.js" use_base_dir="false" type="BINDATA" />
......
......@@ -18,6 +18,7 @@
<script src="mojo/public/mojom/base/time.mojom-lite.js"></script>
<script src="chromeos/services/machine_learning/public/mojom/handwriting_recognizer.mojom-lite.js"></script>
<script src="chromeos/services/machine_learning/public/mojom/model.mojom-lite.js"></script>
<script src="chromeos/services/machine_learning/public/mojom/soda.mojom-lite.js"></script>
<script src="chromeos/services/machine_learning/public/mojom/machine_learning_service.mojom-lite.js"></script>
<script src="chrome/browser/ui/webui/chromeos/machine_learning/machine_learning_internals_page_handler.mojom-lite.js"></script>
</head>
......
......@@ -43,6 +43,9 @@ MachineLearningInternalsUI::MachineLearningInternalsUI(
{IDR_MACHINE_LEARNING_INTERNALS_PAGE_HANDLER_MOJO_JS,
"chrome/browser/ui/webui/chromeos/machine_learning/"
"machine_learning_internals_page_handler.mojom-lite.js"},
{IDR_MACHINE_LEARNING_INTERNALS_SODA_MOJO_JS,
"chromeos/services/machine_learning/public/mojom/"
"soda.mojom-lite.js"},
{IDR_MACHINE_LEARNING_INTERNALS_TENSOR_MOJO_JS,
"chromeos/services/machine_learning/public/mojom/tensor.mojom-lite.js"},
......
......@@ -17,6 +17,7 @@ FakeServiceConnectionImpl::FakeServiceConnectionImpl()
load_handwriting_model_result_(mojom::LoadHandwritingModelResult::OK),
load_model_result_(mojom::LoadModelResult::OK),
load_text_classifier_result_(mojom::LoadModelResult::OK),
load_soda_result_(mojom::LoadModelResult::OK),
create_graph_executor_result_(mojom::CreateGraphExecutorResult::OK),
execute_result_(mojom::ExecuteResult::OK),
async_mode_(false) {}
......@@ -91,6 +92,16 @@ void FakeServiceConnectionImpl::LoadGrammarChecker(
&FakeServiceConnectionImpl::HandleLoadGrammarChecker,
base::Unretained(this), std::move(receiver), std::move(callback)));
}
void FakeServiceConnectionImpl::LoadSpeechRecognizer(
mojom::SodaConfigPtr soda_config,
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback) {
ScheduleCall(
base::BindOnce(&FakeServiceConnectionImpl::HandleLoadSpeechRecognizer,
base::Unretained(this), std::move(soda_client),
std::move(soda_recognizer), std::move(callback)));
}
void FakeServiceConnectionImpl::Execute(
base::flat_map<std::string, mojom::TensorPtr> inputs,
......@@ -304,13 +315,37 @@ void FakeServiceConnectionImpl::Check(
&FakeServiceConnectionImpl::HandleGrammarCheckerQuery,
base::Unretained(this), std::move(query), std::move(callback)));
}
void FakeServiceConnectionImpl::HandleStop() {
// Do something on the client
}
void FakeServiceConnectionImpl::HandleStart() {
// Do something on the client.
}
void FakeServiceConnectionImpl::HandleMarkDone() {
HandleStop();
}
void FakeServiceConnectionImpl::AddAudio(const std::vector<uint8_t>& audio) {}
void FakeServiceConnectionImpl::Stop() {
ScheduleCall(base::BindOnce(&FakeServiceConnectionImpl::HandleStop,
base::Unretained(this)));
}
void FakeServiceConnectionImpl::Start() {
ScheduleCall(base::BindOnce(&FakeServiceConnectionImpl::HandleStart,
base::Unretained(this)));
}
void FakeServiceConnectionImpl::MarkDone() {
ScheduleCall(base::BindOnce(&FakeServiceConnectionImpl::HandleMarkDone,
base::Unretained(this)));
}
void FakeServiceConnectionImpl::HandleLoadHandwritingModel(
mojo::PendingReceiver<mojom::HandwritingRecognizer> receiver,
mojom::MachineLearningService::LoadHandwritingModelCallback callback) {
if (load_handwriting_model_result_ == mojom::LoadHandwritingModelResult::OK)
handwriting_receivers_.Add(this, std::move(receiver));
std::move(callback).Run(load_handwriting_model_result_);
}
......@@ -338,6 +373,16 @@ void FakeServiceConnectionImpl::HandleLoadGrammarChecker(
std::move(callback).Run(load_model_result_);
}
void FakeServiceConnectionImpl::HandleLoadSpeechRecognizer(
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback) {
if (load_soda_result_ == mojom::LoadModelResult::OK) {
soda_recognizer_receivers_.Add(this, std::move(soda_recognizer));
soda_client_remotes_.Add(std::move(soda_client));
}
std::move(callback).Run(load_soda_result_);
}
void FakeServiceConnectionImpl::HandleGrammarCheckerQuery(
mojom::GrammarCheckerQueryPtr query,
......
......@@ -19,6 +19,7 @@
#include "chromeos/services/machine_learning/public/mojom/text_classifier.mojom.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/receiver_set.h"
#include "mojo/public/cpp/bindings/remote_set.h"
namespace chromeos {
namespace machine_learning {
......@@ -37,7 +38,8 @@ class FakeServiceConnectionImpl : public ServiceConnection,
public mojom::TextClassifier,
public mojom::HandwritingRecognizer,
public mojom::GrammarChecker,
public mojom::GraphExecutor {
public mojom::GraphExecutor,
public mojom::SodaRecognizer {
public:
FakeServiceConnectionImpl();
~FakeServiceConnectionImpl() override;
......@@ -78,6 +80,13 @@ class FakeServiceConnectionImpl : public ServiceConnection,
mojom::MachineLearningService::LoadGrammarCheckerCallback callback)
override;
void LoadSpeechRecognizer(
mojom::SodaConfigPtr soda_config,
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback)
override;
// mojom::Model:
void CreateGraphExecutor(
mojo::PendingReceiver<mojom::GraphExecutor> receiver,
......@@ -166,6 +175,12 @@ class FakeServiceConnectionImpl : public ServiceConnection,
void Check(mojom::GrammarCheckerQueryPtr query,
mojom::GrammarChecker::CheckCallback callback) override;
// mojom::SpeechRecognizer
void AddAudio(const std::vector<uint8_t>& audio) override;
void Stop() override;
void Start() override;
void MarkDone() override;
private:
void ScheduleCall(base::OnceClosure call);
void HandleLoadBuiltinModelCall(
......@@ -204,16 +219,27 @@ class FakeServiceConnectionImpl : public ServiceConnection,
mojom::MachineLearningService::LoadGrammarCheckerCallback callback);
void HandleGrammarCheckerQuery(mojom::GrammarCheckerQueryPtr query,
mojom::GrammarChecker::CheckCallback callback);
void HandleLoadSpeechRecognizer(
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback);
void HandleStop();
void HandleStart();
void HandleMarkDone();
mojo::ReceiverSet<mojom::Model> model_receivers_;
mojo::ReceiverSet<mojom::GraphExecutor> graph_receivers_;
mojo::ReceiverSet<mojom::TextClassifier> text_classifier_receivers_;
mojo::ReceiverSet<mojom::HandwritingRecognizer> handwriting_receivers_;
mojo::ReceiverSet<mojom::GrammarChecker> grammar_checker_receivers_;
mojo::ReceiverSet<mojom::SodaRecognizer> soda_recognizer_receivers_;
mojo::RemoteSet<mojom::SodaClient> soda_client_remotes_;
mojom::TensorPtr output_tensor_;
mojom::LoadHandwritingModelResult load_handwriting_model_result_;
mojom::LoadModelResult load_model_result_;
mojom::LoadModelResult load_text_classifier_result_;
mojom::LoadModelResult load_soda_result_;
mojom::CreateGraphExecutorResult create_graph_executor_result_;
mojom::ExecuteResult execute_result_;
std::vector<mojom::TextAnnotationPtr> annotate_result_;
......
......@@ -61,6 +61,13 @@ class ServiceConnectionImpl : public ServiceConnection {
mojom::MachineLearningService::LoadGrammarCheckerCallback result_callback)
override;
void LoadSpeechRecognizer(
mojom::SodaConfigPtr soda_config,
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback)
override;
private:
// Binds the top level interface |machine_learning_service_| to an
// implementation in the ML Service daemon, if it is not already bound. The
......@@ -142,6 +149,18 @@ void ServiceConnectionImpl::LoadGrammarChecker(
std::move(result_callback));
}
void ServiceConnectionImpl::LoadSpeechRecognizer(
mojom::SodaConfigPtr soda_config,
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
BindMachineLearningServiceIfNeeded();
machine_learning_service_->LoadSpeechRecognizer(
std::move(soda_config), std::move(soda_client),
std::move(soda_recognizer), std::move(callback));
}
void ServiceConnectionImpl::BindMachineLearningServiceIfNeeded() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (machine_learning_service_) {
......
......@@ -94,6 +94,13 @@ class ServiceConnection {
mojom::MachineLearningService::LoadGrammarCheckerCallback
result_callback) = 0;
// Instruct ML daemon to load a SODA model.
virtual void LoadSpeechRecognizer(
mojom::SodaConfigPtr soda_config,
mojo::PendingRemote<mojom::SodaClient> soda_client,
mojo::PendingReceiver<mojom::SodaRecognizer> soda_recognizer,
mojom::MachineLearningService::LoadSpeechRecognizerCallback callback) = 0;
protected:
ServiceConnection() = default;
virtual ~ServiceConnection() {}
......
......@@ -11,6 +11,7 @@
#include "base/macros.h"
#include "base/message_loop/message_pump_type.h"
#include "base/run_loop.h"
#include "base/test/bind.h"
#include "base/test/task_environment.h"
#include "base/threading/thread.h"
#include "chromeos/dbus/machine_learning/machine_learning_client.h"
......@@ -93,6 +94,32 @@ TEST_F(ServiceConnectionTest, LoadHandwritingModelWithSpec) {
base::BindOnce([](mojom::LoadModelResult result) {}));
}
class TestSodaClient : public mojom::SodaClient {};
// Tests that LoadSpeechRecognizer runs OK without a crash in a basic Mojo
// Environment.
TEST_F(ServiceConnectionTest, LoadSpeechRecognizerAndCallback) {
mojo::Remote<mojom::SodaRecognizer> soda_recognizer;
TestSodaClient test_client;
FakeServiceConnectionImpl fake_service_connection;
ServiceConnection::UseFakeServiceConnectionForTesting(
&fake_service_connection);
mojo::Receiver<mojom::SodaClient> soda_client{&test_client};
bool callback_done = false;
auto config = mojom::SodaConfig::New();
base::RunLoop run_loop;
ServiceConnection::GetInstance()->LoadSpeechRecognizer(
std::move(config), soda_client.BindNewPipeAndPassRemote(),
soda_recognizer.BindNewPipeAndPassReceiver(),
base::BindLambdaForTesting([&](mojom::LoadModelResult result) {
callback_done = true;
EXPECT_EQ(result, mojom::LoadModelResult::OK);
run_loop.Quit();
}));
run_loop.Run();
ASSERT_TRUE(callback_done);
}
// Tests that LoadGrammarChecker runs OK (no crash) in a basic Mojo environment.
TEST_F(ServiceConnectionTest, LoadGrammarModel) {
mojo::Remote<mojom::GrammarChecker> grammar_checker;
......
......@@ -12,6 +12,7 @@ mojom("mojom") {
"handwriting_recognizer_requestor.mojom",
"machine_learning_service.mojom",
"model.mojom",
"soda.mojom",
"tensor.mojom",
"text_classifier.mojom",
]
......
......@@ -21,6 +21,7 @@ module chromeos.machine_learning.mojom;
import "chromeos/services/machine_learning/public/mojom/grammar_checker.mojom";
import "chromeos/services/machine_learning/public/mojom/handwriting_recognizer.mojom";
import "chromeos/services/machine_learning/public/mojom/model.mojom";
import "chromeos/services/machine_learning/public/mojom/soda.mojom";
import "chromeos/services/machine_learning/public/mojom/text_classifier.mojom";
// These values are persisted to logs. Entries should not be renumbered and
......@@ -61,6 +62,11 @@ interface MachineLearningService {
HandwritingRecognizerSpec spec,
pending_receiver<HandwritingRecognizer> receiver)
=> (LoadModelResult result);
// Create and initialize a speech recognizer with given `config`.
LoadSpeechRecognizer@6(SodaConfig config,
pending_remote<SodaClient> soda_client,
pending_receiver<SodaRecognizer> soda_recognizer)
=> (LoadModelResult result);
// Create and initialize a grammar checker.
LoadGrammarChecker@7(pending_receiver<GrammarChecker> receiver)
=> (LoadModelResult result);
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Datatypes and interfaces of speech recognition API.
// NOTE: This mojom exists in two places and must be kept in sync:
// Chromium: //chromeos/services/machine_learning/public/mojom/
// Chrome OS: src/platform2/ml/mojom/
// Note: Other repos downstream of Chromium might also use this mojom.
// Example: A backwards-compatible mojom change (and corresponding
// implementation change) can be made in Chrome OS first, then replicated to the
// clients (Chromium, other downstream repos) later.
// Use //chromeos/services/machine_learning/public/mojom/roll_mojom.sh to help
// replicate Chrome OS-side changes over to Chromium.
module chromeos.machine_learning.mojom;
import "mojo/public/mojom/base/time.mojom";
// The configuration used to load Soda recognizer.
struct SodaConfig {
// Number of channels of the audio that will be sent to Soda recognizer.
uint32 channel_count;
// Sample rate of the audio that will be sent to Soda recognizer.
uint32 sample_rate;
// The api key for Soda library.
string api_key;
// Load path to find the SODA content.
string load_path;
// File to load as the chrome-SODA library.
string? api_lib_location;
};
// From the endpointer, What kind of endpointer event to record.
enum EndpointerType {
// Speech detected.
START_OF_SPEECH,
// End of speech detected, but audio continues.
END_OF_SPEECH,
// Audio is terminated.
END_OF_AUDIO,
// Query is terminated.
END_OF_UTTERANCE
};
// Common information about the timing of reported SODA events.
struct TimingInfo {
// Epoch time of the first audio buffer of the main query that is fed into
// ASR. This is the wall time read from the system clock when the first audio
// buffer is received by the terse processor.
mojo_base.mojom.Time audio_start_epoch;
// Start time in audio time from the start of the SODA session.
// This time measures the amount of audio input into SODA.
mojo_base.mojom.TimeDelta audio_start_time;
// Elapsed wall time usec since the first frame.
mojo_base.mojom.TimeDelta elapsed_wall_time;
// Elapsed processed audio usec from first frame after preamble.
mojo_base.mojom.TimeDelta event_end_time;
// On device benchmark latency as defined in go/asr-latency-metrics.
mojo_base.mojom.TimeDelta latency;
// On device counter part of E2E normalized latency as defined in
// go/asr-latency-metrics. This metric is mainly for non-continuous
// conversation.
float normalized_latency;
// Timing for each word as an offset from audio_start_time_usec.
array<mojo_base.mojom.TimeDelta> word_alignments;
};
// Start/end events.
struct EndpointerEvent {
EndpointerType endpointer_type;
TimingInfo? timing_event;
};
// A result _during_ a recognition. Could change at any time with the
// next partial or the final recognition for this chunk.
struct PartialResult {
// Most likely hypothesis so far. First is the most likely, followed by others.
// Note: the relationship from first to other hypothess is not guaranteed in
// any way.
array<string> partial_text;
TimingInfo? timing_event;
};
enum EndpointReason {
// Default value, unknown reason.
ENDPOINT_UNKNOWN,
// Due to end_of_speech detection by endpointer.
ENDPOINT_END_OF_SPEECH,
// Due to end_of_utterance detection by endpointer.
ENDPOINT_END_OF_UTTERANCE,
// Due to the end of mics audio. This could be due to a mic event or SODA
// being stopped.
ENDPOINT_END_OF_AUDIO,
};
struct FinalResult {
// Sorted in decreasing order of probability.
array<string> final_hypotheses;
EndpointReason endpoint_reason;
TimingInfo? timing_event;
};
// Frequent event from recognizer, almost from every frame. Gives an indication of speechiness and audio level.
struct AudioLevelEvent {
// RMS audio level, from PowerEvaluator . Score is [0, 1)
float rms;
// Speech likelihood score, from TerseProcessor. Score is [0, 1)
float audio_level;
};
// This essentially mirrors the subset of SODA's SodaEvent proto we will
// support.
union SpeechRecognizerEvent {
AudioLevelEvent audio_event;
PartialResult partial_result;
EndpointerEvent endpointer_event;
FinalResult final_result;
};
// This interface is called upon by the SodaRecognizer. Implemented by
// the client, SODA then calls these as 'events' with appropriate details
// when recognition occurs.
interface SodaClient {
};
// The mojom interface for performing the recognition of handwritten text.
interface SodaRecognizer {
// Add Audio for speech recognition.
AddAudio@0(array<uint8> audio);
// Instruct SODA to stop processing immediately. Stopping is
// confirmed when SodaClient::OnStop is called back.
Stop@1();
// Instruct SODA to start processing. Noop if already
// processing. When Stopped, causes a SodAclient::OnStart callback.
Start@2();
// Instruct SODA to stop processing after all queued audio is
// processed. Will eventually result in a SodaClient::OnStop, but only
// after all audio currently in queue is decoded.
MarkDone@3();
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment