Commit 5fe33cf0 authored by David Tseng's avatar David Tseng Committed by Chromium LUCI CQ

Define text_to_speech watchlist

R=dmazzoni@chromium.org

Change-Id: Ic8c51de3c4ac96cdcc496842a9a3698a976778e6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2577543Reviewed-by: default avatarDominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: David Tseng <dtseng@chromium.org>
Auto-Submit: David Tseng <dtseng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#834339}
parent 87d9c99c
...@@ -147,13 +147,20 @@ void GoogleTtsStream::ReadMoreFrames(bool is_first_buffer) { ...@@ -147,13 +147,20 @@ void GoogleTtsStream::ReadMoreFrames(bool is_first_buffer) {
buf.frames.resize(frames_in_buf); buf.frames.resize(frames_in_buf);
buf.char_index = -1; buf.char_index = -1;
if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
buf.is_first_buffer = is_first_buffer; buf.is_first_buffer = is_first_buffer;
owner_->AddAudioBuffer(std::move(buf)); owner_->AddAudioBuffer(std::move(buf));
for (size_t timepoint_index = 0;
timepoint_index < libchrometts_.GoogleTtsGetTimepointsCount();
timepoint_index++) {
owner_->AddExplicitTimepoint(
libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(timepoint_index),
base::TimeDelta::FromSecondsD(
libchrometts_.GoogleTtsGetTimepointsTimeInSecsAtIndex(
timepoint_index)));
}
if (status <= 0) if (status <= 0)
return; return;
......
...@@ -87,6 +87,11 @@ void TtsService::AddAudioBuffer(AudioBuffer buf) { ...@@ -87,6 +87,11 @@ void TtsService::AddAudioBuffer(AudioBuffer buf) {
buffers_.emplace_back(std::move(buf)); buffers_.emplace_back(std::move(buf));
} }
void TtsService::AddExplicitTimepoint(int char_index, base::TimeDelta delay) {
base::AutoLock al(state_lock_);
timepoints_.push_back({char_index, delay});
}
void TtsService::Stop() { void TtsService::Stop() {
base::AutoLock al(state_lock_); base::AutoLock al(state_lock_);
StopLocked(); StopLocked();
...@@ -118,10 +123,9 @@ int TtsService::Render(base::TimeDelta delay, ...@@ -118,10 +123,9 @@ int TtsService::Render(base::TimeDelta delay,
media::AudioBus* dest) { media::AudioBus* dest) {
size_t frames_in_buf = 0; size_t frames_in_buf = 0;
int32_t status = -1; int32_t status = -1;
int char_index = -1;
bool is_first_buffer = false;
{ {
base::AutoLock al(state_lock_); base::AutoLock al(state_lock_);
if (buffers_.empty()) if (buffers_.empty())
return 0; return 0;
...@@ -139,25 +143,32 @@ int TtsService::Render(base::TimeDelta delay, ...@@ -139,25 +143,32 @@ int TtsService::Render(base::TimeDelta delay,
return 0; return 0;
} }
char_index = buf.char_index; if (buf.is_first_buffer) {
is_first_buffer = buf.is_first_buffer; start_playback_time_ = base::Time::Now();
const float* frames = &buf.frames[0]; tts_event_observer_->OnStart();
}
// Implied timepoint.
if (buf.char_index != -1)
tts_event_observer_->OnTimepoint(buf.char_index);
// Explicit timepoint(s).
base::TimeDelta start_to_now = base::Time::Now() - start_playback_time_;
while (!timepoints_.empty() && timepoints_.front().second <= start_to_now) {
tts_event_observer_->OnTimepoint(timepoints_.front().first);
timepoints_.pop_front();
}
frames_in_buf = buf.frames.size(); frames_in_buf = buf.frames.size();
const float* frames = nullptr;
if (!buf.frames.empty())
frames = &buf.frames[0];
float* channel = dest->channel(0); float* channel = dest->channel(0);
for (size_t i = 0; i < frames_in_buf; i++) for (size_t i = 0; i < frames_in_buf; i++)
channel[i] = frames[i]; channel[i] = frames[i];
buffers_.pop_front(); buffers_.pop_front();
} }
if (is_first_buffer)
tts_event_observer_->OnStart();
if (frames_in_buf == 0)
return 0;
if (char_index != -1)
tts_event_observer_->OnTimepoint(char_index);
return frames_in_buf; return frames_in_buf;
} }
...@@ -165,8 +176,10 @@ void TtsService::OnRenderError() {} ...@@ -165,8 +176,10 @@ void TtsService::OnRenderError() {}
void TtsService::StopLocked(bool clear_buffers) { void TtsService::StopLocked(bool clear_buffers) {
output_device_->Pause(); output_device_->Pause();
if (clear_buffers) if (clear_buffers) {
buffers_.clear(); buffers_.clear();
timepoints_.clear();
}
} }
void TtsService::ProcessPendingTtsStreamFactories() { void TtsService::ProcessPendingTtsStreamFactories() {
......
...@@ -47,6 +47,7 @@ class TtsService : public mojom::TtsService, ...@@ -47,6 +47,7 @@ class TtsService : public mojom::TtsService,
base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)> base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)>
callback); callback);
void AddAudioBuffer(AudioBuffer buf); void AddAudioBuffer(AudioBuffer buf);
void AddExplicitTimepoint(int char_index, base::TimeDelta delay);
void Stop(); void Stop();
void SetVolume(float volume); void SetVolume(float volume);
void Pause(); void Pause();
...@@ -64,7 +65,6 @@ class TtsService : public mojom::TtsService, ...@@ -64,7 +65,6 @@ class TtsService : public mojom::TtsService,
return pending_tts_stream_factory_receivers_; return pending_tts_stream_factory_receivers_;
} }
private:
// mojom::TtsService: // mojom::TtsService:
void BindTtsStreamFactory( void BindTtsStreamFactory(
mojo::PendingReceiver<mojom::TtsStreamFactory> receiver, mojo::PendingReceiver<mojom::TtsStreamFactory> receiver,
...@@ -82,6 +82,7 @@ class TtsService : public mojom::TtsService, ...@@ -82,6 +82,7 @@ class TtsService : public mojom::TtsService,
media::AudioBus* dest) override; media::AudioBus* dest) override;
void OnRenderError() override; void OnRenderError() override;
private:
// Handles stopping tts. // Handles stopping tts.
void StopLocked(bool clear_buffers = true) void StopLocked(bool clear_buffers = true)
EXCLUSIVE_LOCKS_REQUIRED(state_lock_); EXCLUSIVE_LOCKS_REQUIRED(state_lock_);
...@@ -114,6 +115,16 @@ class TtsService : public mojom::TtsService, ...@@ -114,6 +115,16 @@ class TtsService : public mojom::TtsService,
// The queue of audio buffers to be played by the audio thread. // The queue of audio buffers to be played by the audio thread.
std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_); std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
// An explicit list of increasing time delta sorted timepoints to be fired
// while rendering audio at the specified |delay| from start of audio
// playback. An AudioBuffer may contain an implicit tiepoint for callers who
// specify a character index along with the audio buffer.
std::deque<std::pair<int, base::TimeDelta>> timepoints_
GUARDED_BY(state_lock_);
// The time at which playback of the current utterance started.
base::Time start_playback_time_;
}; };
} // namespace tts } // namespace tts
......
...@@ -21,9 +21,9 @@ namespace chromeos { ...@@ -21,9 +21,9 @@ namespace chromeos {
namespace tts { namespace tts {
using CreateOutputStreamCallback = using CreateOutputStreamCallback =
base::OnceCallback<void(::media::mojom::ReadWriteAudioDataPipePtr)>; base::OnceCallback<void(media::mojom::ReadWriteAudioDataPipePtr)>;
using CreateLoopbackStreamCallback = using CreateLoopbackStreamCallback =
base::OnceCallback<void(::media::mojom::ReadOnlyAudioDataPipePtr)>; base::OnceCallback<void(media::mojom::ReadOnlyAudioDataPipePtr)>;
class MockAudioStreamFactory : public audio::mojom::StreamFactory { class MockAudioStreamFactory : public audio::mojom::StreamFactory {
public: public:
...@@ -51,6 +51,7 @@ class MockAudioStreamFactory : public audio::mojom::StreamFactory { ...@@ -51,6 +51,7 @@ class MockAudioStreamFactory : public audio::mojom::StreamFactory {
const media::AudioParameters& params, const media::AudioParameters& params,
const base::UnguessableToken& group_id, const base::UnguessableToken& group_id,
CreateOutputStreamCallback callback) override { CreateOutputStreamCallback callback) override {
audio_output_stream_ = std::move(stream);
std::move(callback).Run(nullptr); std::move(callback).Run(nullptr);
} }
void BindMuter( void BindMuter(
...@@ -65,30 +66,70 @@ class MockAudioStreamFactory : public audio::mojom::StreamFactory { ...@@ -65,30 +66,70 @@ class MockAudioStreamFactory : public audio::mojom::StreamFactory {
uint32_t shared_memory_count, uint32_t shared_memory_count,
const base::UnguessableToken& group_id, const base::UnguessableToken& group_id,
CreateLoopbackStreamCallback callback) override {} CreateLoopbackStreamCallback callback) override {}
PendingReceiver<media::mojom::AudioOutputStream> audio_output_stream_;
};
class MockTtsEventObserver : public mojom::TtsEventObserver {
public:
// mojom::TtsEventObserver:
void OnStart() override { start_count++; }
void OnTimepoint(int32_t char_index) override {
char_indices.push_back(char_index);
}
void OnEnd() override { end_count++; }
void OnError() override {}
int start_count = 0;
std::vector<int> char_indices;
int end_count = 0;
}; };
class TtsServiceTest : public testing::Test { class TtsServiceTest : public testing::Test {
public: public:
TtsServiceTest() : service_(remote_service_.BindNewPipeAndPassReceiver()) {} TtsServiceTest()
: service_(remote_service_.BindNewPipeAndPassReceiver()),
audio_stream_factory_(&mock_audio_stream_factory_) {}
~TtsServiceTest() override = default; ~TtsServiceTest() override = default;
protected: protected:
void InitTtsStreamFactory( void InitTtsStreamFactory(
mojo::Remote<mojom::TtsStreamFactory>* tts_stream_factory) { mojo::Remote<mojom::TtsStreamFactory>* tts_stream_factory) {
mojo::Receiver<audio::mojom::StreamFactory> audio_stream_factory( // Audio stream factory is here to get a basic environment working only.
&mock_audio_stream_factory_); // Unbind and rebind if needed.
if (audio_stream_factory_.is_bound())
audio_stream_factory_.reset();
remote_service_->BindTtsStreamFactory( remote_service_->BindTtsStreamFactory(
tts_stream_factory->BindNewPipeAndPassReceiver(), tts_stream_factory->BindNewPipeAndPassReceiver(),
audio_stream_factory.BindNewPipeAndPassRemote()); audio_stream_factory_.BindNewPipeAndPassRemote());
remote_service_.FlushForTesting(); remote_service_.FlushForTesting();
EXPECT_TRUE(service_.tts_stream_factory_for_testing()->is_bound()); EXPECT_TRUE(service_.tts_stream_factory_for_testing()->is_bound());
EXPECT_TRUE(tts_stream_factory->is_connected()); EXPECT_TRUE(tts_stream_factory->is_connected());
} }
void InitPlaybackTtsStream(
mojo::Remote<mojom::PlaybackTtsStream>* playback_tts_stream) {
mojo::Remote<mojom::TtsStreamFactory> tts_stream_factory;
InitTtsStreamFactory(&tts_stream_factory);
tts_stream_factory->CreatePlaybackTtsStream(base::BindOnce(
[](mojo::Remote<mojom::PlaybackTtsStream>* playback_tts_stream,
PendingRemote<mojom::PlaybackTtsStream> stream, int32_t sample_rate,
int32_t buffer_size) {
playback_tts_stream->Bind(std::move(stream));
},
playback_tts_stream));
tts_stream_factory.FlushForTesting();
}
base::test::TaskEnvironment task_environment_; base::test::TaskEnvironment task_environment_;
mojo::Remote<mojom::TtsService> remote_service_; mojo::Remote<mojom::TtsService> remote_service_;
MockAudioStreamFactory mock_audio_stream_factory_;
TtsService service_; TtsService service_;
MockAudioStreamFactory mock_audio_stream_factory_;
mojo::Receiver<audio::mojom::StreamFactory> audio_stream_factory_;
}; };
TEST_F(TtsServiceTest, BindMultipleStreamFactories) { TEST_F(TtsServiceTest, BindMultipleStreamFactories) {
...@@ -185,5 +226,114 @@ TEST_F(TtsServiceTest, BindMultipleStreamFactoriesCreateInterleaved) { ...@@ -185,5 +226,114 @@ TEST_F(TtsServiceTest, BindMultipleStreamFactoriesCreateInterleaved) {
EXPECT_FALSE(tts_stream_factory2.is_connected()); EXPECT_FALSE(tts_stream_factory2.is_connected());
} }
TEST_F(TtsServiceTest, BasicAudioBuffering) {
mojo::Remote<mojom::PlaybackTtsStream> playback_tts_stream;
InitPlaybackTtsStream(&playback_tts_stream);
MockTtsEventObserver backing_observer;
mojo::Receiver<mojom::TtsEventObserver> observer(&backing_observer);
playback_tts_stream->Play(base::BindOnce(
[](mojo::Receiver<mojom::TtsEventObserver>* receiver,
mojo::PendingReceiver<mojom::TtsEventObserver> pending_receiver) {
receiver->Bind(std::move(pending_receiver));
},
&observer));
playback_tts_stream.FlushForTesting();
auto bus = media::AudioBus::Create(1 /* channels */, 512 /* frames */);
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
// The playback stream pushes an empty buffer to trigger a start event.
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_TRUE(backing_observer.char_indices.empty());
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), 100 /* char_index */, false /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(1U, backing_observer.char_indices.size());
EXPECT_EQ(100, backing_observer.char_indices[0]);
EXPECT_EQ(0, backing_observer.end_count);
// Note that the cahr index is ignored for the end of all audio as it's
// assumed to be the length of the utterance.
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), 9999 /* char_index */, true /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(1U, backing_observer.char_indices.size());
EXPECT_EQ(1, backing_observer.end_count);
}
TEST_F(TtsServiceTest, ExplicitAudioTimepointing) {
mojo::Remote<mojom::PlaybackTtsStream> playback_tts_stream;
InitPlaybackTtsStream(&playback_tts_stream);
MockTtsEventObserver backing_observer;
mojo::Receiver<mojom::TtsEventObserver> observer(&backing_observer);
playback_tts_stream->Play(base::BindOnce(
[](mojo::Receiver<mojom::TtsEventObserver>* receiver,
mojo::PendingReceiver<mojom::TtsEventObserver> pending_receiver) {
receiver->Bind(std::move(pending_receiver));
},
&observer));
playback_tts_stream.FlushForTesting();
auto bus = media::AudioBus::Create(1 /* channels */, 512 /* frames */);
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
// The playback stream pushes an empty buffer to trigger a start event.
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_TRUE(backing_observer.char_indices.empty());
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), -1 /* char_index */, false /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_TRUE(backing_observer.char_indices.empty());
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), -1 /* char_index */, false /* last buffer */);
service_.AddExplicitTimepoint(100, base::TimeDelta::FromSeconds(0));
service_.AddExplicitTimepoint(200, base::TimeDelta::FromSeconds(0));
service_.AddExplicitTimepoint(300, base::TimeDelta::FromSeconds(0));
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(3U, backing_observer.char_indices.size());
EXPECT_EQ(100, backing_observer.char_indices[0]);
EXPECT_EQ(200, backing_observer.char_indices[1]);
EXPECT_EQ(300, backing_observer.char_indices[2]);
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), 9999 /* char_index */, true /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(3U, backing_observer.char_indices.size());
EXPECT_EQ(1, backing_observer.end_count);
}
} // namespace tts } // namespace tts
} // namespace chromeos } // namespace chromeos
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment