Commit 630c7ae4 authored by evliu's avatar evliu Committed by Commit Bot

Reset the Cloud recognition stream if audio has been paused for 30 seconds

This CL resets the Cloud recognition stream if audio has been paused
for more than 28 seconds. The Open Speech API will not return recognition
events if more than 30 seconds has elapsed since the last audio upload.

Bug: 1101566
Change-Id: I9a667358fc8d5b98faaee3922d1d9778ab7a40f3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2278533
Commit-Queue: Evan Liu <evliu@google.com>
Reviewed-by: default avatarDale Curtis <dalecurtis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#786506}
parent 9bc4c422
...@@ -22,9 +22,14 @@ namespace speech { ...@@ -22,9 +22,14 @@ namespace speech {
// The maximum duration a stream can be open for. The Open Speech API supports 5 // The maximum duration a stream can be open for. The Open Speech API supports 5
// minutes of continuous recognition. // minutes of continuous recognition.
constexpr base::TimeDelta kStreamResetDuration = constexpr base::TimeDelta kMaximumStreamDuration =
base::TimeDelta::FromSeconds(295); base::TimeDelta::FromSeconds(295);
// The Open Speech API will not return any recognition events if 30 seconds have
// elapsed since the last audio upload.
constexpr base::TimeDelta kMaximumPauseDuration =
base::TimeDelta::FromSeconds(28);
constexpr char kWebServiceBaseUrl[] = constexpr char kWebServiceBaseUrl[] =
"https://www.google.com/speech-api/full-duplex/v1"; "https://www.google.com/speech-api/full-duplex/v1";
constexpr char kDownstreamUrl[] = "/down"; constexpr char kDownstreamUrl[] = "/down";
...@@ -108,12 +113,12 @@ void CloudSpeechRecognitionClient::OnDownstreamDataReceived( ...@@ -108,12 +113,12 @@ void CloudSpeechRecognitionClient::OnDownstreamDataReceived(
void CloudSpeechRecognitionClient::Reset() { void CloudSpeechRecognitionClient::Reset() {
DCHECK(is_initialized_); DCHECK(is_initialized_);
// Return if the URL loader factory has not been set. // Return if the URL loader factory has not been set.
if (!url_loader_factory_) if (!url_loader_factory_)
return; return;
last_reset_ = base::Time::Now(); last_reset_ = base::TimeTicks::Now();
last_upload_ = base::TimeTicks::Now();
const std::string request_key = base::UnguessableToken::Create().ToString(); const std::string request_key = base::UnguessableToken::Create().ToString();
// Setup downstream fetcher. // Setup downstream fetcher.
...@@ -190,10 +195,13 @@ void CloudSpeechRecognitionClient::Reset() { ...@@ -190,10 +195,13 @@ void CloudSpeechRecognitionClient::Reset() {
void CloudSpeechRecognitionClient::AddAudio(base::span<const char> chunk) { void CloudSpeechRecognitionClient::AddAudio(base::span<const char> chunk) {
DCHECK(is_initialized_); DCHECK(is_initialized_);
if (base::Time::Now() - last_reset_ > kStreamResetDuration) { base::TimeTicks now = base::TimeTicks::Now();
if (now - last_reset_ > kMaximumStreamDuration ||
now - last_upload_ > kMaximumPauseDuration) {
Reset(); Reset();
} }
last_upload_ = now;
upstream_loader_->AppendChunkToUpload(std::string(chunk.data(), chunk.size()), upstream_loader_->AppendChunkToUpload(std::string(chunk.data(), chunk.size()),
false); false);
} }
......
...@@ -90,7 +90,10 @@ class CloudSpeechRecognitionClient : public speech::UpstreamLoaderClient, ...@@ -90,7 +90,10 @@ class CloudSpeechRecognitionClient : public speech::UpstreamLoaderClient,
std::string previous_result_; std::string previous_result_;
// Stores the last time the stream was reset. // Stores the last time the stream was reset.
base::Time last_reset_; base::TimeTicks last_reset_;
// Stores the last time audio was uploaded.
base::TimeTicks last_upload_;
OnRecognitionEventCallback recognition_event_callback_; OnRecognitionEventCallback recognition_event_callback_;
......
...@@ -477,8 +477,38 @@ TEST_F(CloudSpeechRecognitionClientUnitTest, StreamReset) { ...@@ -477,8 +477,38 @@ TEST_F(CloudSpeechRecognitionClientUnitTest, StreamReset) {
std::string DownloadUrlBeforeReset = std::string DownloadUrlBeforeReset =
GetDownstreamRequest()->request.url.spec(); GetDownstreamRequest()->request.url.spec();
// Fast forward by 325 seconds to trigger a reset. // Fast forward by 325 total seconds to trigger a reset.
task_environment_.FastForwardBy(base::TimeDelta::FromSeconds(325)); for (int i = 0; i < 13; i++) {
InjectDummyAudio();
task_environment_.FastForwardBy(base::TimeDelta::FromSeconds(25));
}
ASSERT_EQ(2, speech_recognition_service_impl_->GetNumPending());
std::string UploadUrlAfterReset = GetUpstreamRequest()->request.url.spec();
std::string DownloadUrlAfterReset =
GetDownstreamRequest()->request.url.spec();
// The URLs after the reset should contain a different request key.
ASSERT_NE(UploadUrlBeforeReset, UploadUrlAfterReset);
ASSERT_NE(DownloadUrlBeforeReset, DownloadUrlAfterReset);
}
// Verifies that the stream is reset if the audio is paused for longer than 30
// seconds.
TEST_F(CloudSpeechRecognitionClientUnitTest, StreamResetAfterPause) {
ASSERT_TRUE(client_under_test_->IsInitialized());
ASSERT_TRUE(GetUpstreamRequest());
ASSERT_TRUE(GetDownstreamRequest());
ASSERT_EQ("", ConsumeChunkedUploadData(0));
InjectDummyAudio();
std::string UploadUrlBeforeReset = GetUpstreamRequest()->request.url.spec();
std::string DownloadUrlBeforeReset =
GetDownstreamRequest()->request.url.spec();
// Fast forward by 35 seconds to trigger a reset.
task_environment_.FastForwardBy(base::TimeDelta::FromSeconds(35));
InjectDummyAudio(); InjectDummyAudio();
ASSERT_EQ(2, speech_recognition_service_impl_->GetNumPending()); ASSERT_EQ(2, speech_recognition_service_impl_->GetNumPending());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment