Commit 5fe33cf0 authored by David Tseng's avatar David Tseng Committed by Chromium LUCI CQ

Define text_to_speech watchlist

R=dmazzoni@chromium.org

Change-Id: Ic8c51de3c4ac96cdcc496842a9a3698a976778e6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2577543Reviewed-by: default avatarDominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: David Tseng <dtseng@chromium.org>
Auto-Submit: David Tseng <dtseng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#834339}
parent 87d9c99c
......@@ -147,13 +147,20 @@ void GoogleTtsStream::ReadMoreFrames(bool is_first_buffer) {
buf.frames.resize(frames_in_buf);
buf.char_index = -1;
if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
buf.is_first_buffer = is_first_buffer;
owner_->AddAudioBuffer(std::move(buf));
for (size_t timepoint_index = 0;
timepoint_index < libchrometts_.GoogleTtsGetTimepointsCount();
timepoint_index++) {
owner_->AddExplicitTimepoint(
libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(timepoint_index),
base::TimeDelta::FromSecondsD(
libchrometts_.GoogleTtsGetTimepointsTimeInSecsAtIndex(
timepoint_index)));
}
if (status <= 0)
return;
......
......@@ -87,6 +87,11 @@ void TtsService::AddAudioBuffer(AudioBuffer buf) {
buffers_.emplace_back(std::move(buf));
}
void TtsService::AddExplicitTimepoint(int char_index, base::TimeDelta delay) {
base::AutoLock al(state_lock_);
timepoints_.push_back({char_index, delay});
}
void TtsService::Stop() {
base::AutoLock al(state_lock_);
StopLocked();
......@@ -118,10 +123,9 @@ int TtsService::Render(base::TimeDelta delay,
media::AudioBus* dest) {
size_t frames_in_buf = 0;
int32_t status = -1;
int char_index = -1;
bool is_first_buffer = false;
{
base::AutoLock al(state_lock_);
if (buffers_.empty())
return 0;
......@@ -139,25 +143,32 @@ int TtsService::Render(base::TimeDelta delay,
return 0;
}
char_index = buf.char_index;
is_first_buffer = buf.is_first_buffer;
const float* frames = &buf.frames[0];
if (buf.is_first_buffer) {
start_playback_time_ = base::Time::Now();
tts_event_observer_->OnStart();
}
// Implied timepoint.
if (buf.char_index != -1)
tts_event_observer_->OnTimepoint(buf.char_index);
// Explicit timepoint(s).
base::TimeDelta start_to_now = base::Time::Now() - start_playback_time_;
while (!timepoints_.empty() && timepoints_.front().second <= start_to_now) {
tts_event_observer_->OnTimepoint(timepoints_.front().first);
timepoints_.pop_front();
}
frames_in_buf = buf.frames.size();
const float* frames = nullptr;
if (!buf.frames.empty())
frames = &buf.frames[0];
float* channel = dest->channel(0);
for (size_t i = 0; i < frames_in_buf; i++)
channel[i] = frames[i];
buffers_.pop_front();
}
if (is_first_buffer)
tts_event_observer_->OnStart();
if (frames_in_buf == 0)
return 0;
if (char_index != -1)
tts_event_observer_->OnTimepoint(char_index);
return frames_in_buf;
}
......@@ -165,8 +176,10 @@ void TtsService::OnRenderError() {}
void TtsService::StopLocked(bool clear_buffers) {
output_device_->Pause();
if (clear_buffers)
if (clear_buffers) {
buffers_.clear();
timepoints_.clear();
}
}
void TtsService::ProcessPendingTtsStreamFactories() {
......
......@@ -47,6 +47,7 @@ class TtsService : public mojom::TtsService,
base::OnceCallback<void(::mojo::PendingReceiver<mojom::TtsEventObserver>)>
callback);
void AddAudioBuffer(AudioBuffer buf);
void AddExplicitTimepoint(int char_index, base::TimeDelta delay);
void Stop();
void SetVolume(float volume);
void Pause();
......@@ -64,7 +65,6 @@ class TtsService : public mojom::TtsService,
return pending_tts_stream_factory_receivers_;
}
private:
// mojom::TtsService:
void BindTtsStreamFactory(
mojo::PendingReceiver<mojom::TtsStreamFactory> receiver,
......@@ -82,6 +82,7 @@ class TtsService : public mojom::TtsService,
media::AudioBus* dest) override;
void OnRenderError() override;
private:
// Handles stopping tts.
void StopLocked(bool clear_buffers = true)
EXCLUSIVE_LOCKS_REQUIRED(state_lock_);
......@@ -114,6 +115,16 @@ class TtsService : public mojom::TtsService,
// The queue of audio buffers to be played by the audio thread.
std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
// An explicit list of increasing time delta sorted timepoints to be fired
// while rendering audio at the specified |delay| from start of audio
// playback. An AudioBuffer may contain an implicit tiepoint for callers who
// specify a character index along with the audio buffer.
std::deque<std::pair<int, base::TimeDelta>> timepoints_
GUARDED_BY(state_lock_);
// The time at which playback of the current utterance started.
base::Time start_playback_time_;
};
} // namespace tts
......
......@@ -21,9 +21,9 @@ namespace chromeos {
namespace tts {
using CreateOutputStreamCallback =
base::OnceCallback<void(::media::mojom::ReadWriteAudioDataPipePtr)>;
base::OnceCallback<void(media::mojom::ReadWriteAudioDataPipePtr)>;
using CreateLoopbackStreamCallback =
base::OnceCallback<void(::media::mojom::ReadOnlyAudioDataPipePtr)>;
base::OnceCallback<void(media::mojom::ReadOnlyAudioDataPipePtr)>;
class MockAudioStreamFactory : public audio::mojom::StreamFactory {
public:
......@@ -51,6 +51,7 @@ class MockAudioStreamFactory : public audio::mojom::StreamFactory {
const media::AudioParameters& params,
const base::UnguessableToken& group_id,
CreateOutputStreamCallback callback) override {
audio_output_stream_ = std::move(stream);
std::move(callback).Run(nullptr);
}
void BindMuter(
......@@ -65,30 +66,70 @@ class MockAudioStreamFactory : public audio::mojom::StreamFactory {
uint32_t shared_memory_count,
const base::UnguessableToken& group_id,
CreateLoopbackStreamCallback callback) override {}
PendingReceiver<media::mojom::AudioOutputStream> audio_output_stream_;
};
class MockTtsEventObserver : public mojom::TtsEventObserver {
public:
// mojom::TtsEventObserver:
void OnStart() override { start_count++; }
void OnTimepoint(int32_t char_index) override {
char_indices.push_back(char_index);
}
void OnEnd() override { end_count++; }
void OnError() override {}
int start_count = 0;
std::vector<int> char_indices;
int end_count = 0;
};
class TtsServiceTest : public testing::Test {
public:
TtsServiceTest() : service_(remote_service_.BindNewPipeAndPassReceiver()) {}
TtsServiceTest()
: service_(remote_service_.BindNewPipeAndPassReceiver()),
audio_stream_factory_(&mock_audio_stream_factory_) {}
~TtsServiceTest() override = default;
protected:
void InitTtsStreamFactory(
mojo::Remote<mojom::TtsStreamFactory>* tts_stream_factory) {
mojo::Receiver<audio::mojom::StreamFactory> audio_stream_factory(
&mock_audio_stream_factory_);
// Audio stream factory is here to get a basic environment working only.
// Unbind and rebind if needed.
if (audio_stream_factory_.is_bound())
audio_stream_factory_.reset();
remote_service_->BindTtsStreamFactory(
tts_stream_factory->BindNewPipeAndPassReceiver(),
audio_stream_factory.BindNewPipeAndPassRemote());
audio_stream_factory_.BindNewPipeAndPassRemote());
remote_service_.FlushForTesting();
EXPECT_TRUE(service_.tts_stream_factory_for_testing()->is_bound());
EXPECT_TRUE(tts_stream_factory->is_connected());
}
void InitPlaybackTtsStream(
mojo::Remote<mojom::PlaybackTtsStream>* playback_tts_stream) {
mojo::Remote<mojom::TtsStreamFactory> tts_stream_factory;
InitTtsStreamFactory(&tts_stream_factory);
tts_stream_factory->CreatePlaybackTtsStream(base::BindOnce(
[](mojo::Remote<mojom::PlaybackTtsStream>* playback_tts_stream,
PendingRemote<mojom::PlaybackTtsStream> stream, int32_t sample_rate,
int32_t buffer_size) {
playback_tts_stream->Bind(std::move(stream));
},
playback_tts_stream));
tts_stream_factory.FlushForTesting();
}
base::test::TaskEnvironment task_environment_;
mojo::Remote<mojom::TtsService> remote_service_;
MockAudioStreamFactory mock_audio_stream_factory_;
TtsService service_;
MockAudioStreamFactory mock_audio_stream_factory_;
mojo::Receiver<audio::mojom::StreamFactory> audio_stream_factory_;
};
TEST_F(TtsServiceTest, BindMultipleStreamFactories) {
......@@ -185,5 +226,114 @@ TEST_F(TtsServiceTest, BindMultipleStreamFactoriesCreateInterleaved) {
EXPECT_FALSE(tts_stream_factory2.is_connected());
}
TEST_F(TtsServiceTest, BasicAudioBuffering) {
mojo::Remote<mojom::PlaybackTtsStream> playback_tts_stream;
InitPlaybackTtsStream(&playback_tts_stream);
MockTtsEventObserver backing_observer;
mojo::Receiver<mojom::TtsEventObserver> observer(&backing_observer);
playback_tts_stream->Play(base::BindOnce(
[](mojo::Receiver<mojom::TtsEventObserver>* receiver,
mojo::PendingReceiver<mojom::TtsEventObserver> pending_receiver) {
receiver->Bind(std::move(pending_receiver));
},
&observer));
playback_tts_stream.FlushForTesting();
auto bus = media::AudioBus::Create(1 /* channels */, 512 /* frames */);
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
// The playback stream pushes an empty buffer to trigger a start event.
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_TRUE(backing_observer.char_indices.empty());
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), 100 /* char_index */, false /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(1U, backing_observer.char_indices.size());
EXPECT_EQ(100, backing_observer.char_indices[0]);
EXPECT_EQ(0, backing_observer.end_count);
// Note that the cahr index is ignored for the end of all audio as it's
// assumed to be the length of the utterance.
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), 9999 /* char_index */, true /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(1U, backing_observer.char_indices.size());
EXPECT_EQ(1, backing_observer.end_count);
}
TEST_F(TtsServiceTest, ExplicitAudioTimepointing) {
mojo::Remote<mojom::PlaybackTtsStream> playback_tts_stream;
InitPlaybackTtsStream(&playback_tts_stream);
MockTtsEventObserver backing_observer;
mojo::Receiver<mojom::TtsEventObserver> observer(&backing_observer);
playback_tts_stream->Play(base::BindOnce(
[](mojo::Receiver<mojom::TtsEventObserver>* receiver,
mojo::PendingReceiver<mojom::TtsEventObserver> pending_receiver) {
receiver->Bind(std::move(pending_receiver));
},
&observer));
playback_tts_stream.FlushForTesting();
auto bus = media::AudioBus::Create(1 /* channels */, 512 /* frames */);
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
// The playback stream pushes an empty buffer to trigger a start event.
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_TRUE(backing_observer.char_indices.empty());
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), -1 /* char_index */, false /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_TRUE(backing_observer.char_indices.empty());
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), -1 /* char_index */, false /* last buffer */);
service_.AddExplicitTimepoint(100, base::TimeDelta::FromSeconds(0));
service_.AddExplicitTimepoint(200, base::TimeDelta::FromSeconds(0));
service_.AddExplicitTimepoint(300, base::TimeDelta::FromSeconds(0));
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(3U, backing_observer.char_indices.size());
EXPECT_EQ(100, backing_observer.char_indices[0]);
EXPECT_EQ(200, backing_observer.char_indices[1]);
EXPECT_EQ(300, backing_observer.char_indices[2]);
EXPECT_EQ(0, backing_observer.end_count);
playback_tts_stream->SendAudioBuffer(
std::vector<float>(), 9999 /* char_index */, true /* last buffer */);
playback_tts_stream.FlushForTesting();
service_.Render(base::TimeDelta::FromSeconds(0), base::TimeTicks::Now(),
0 /* prior frames skipped */, bus.get());
observer.FlushForTesting();
EXPECT_EQ(1, backing_observer.start_count);
EXPECT_EQ(3U, backing_observer.char_indices.size());
EXPECT_EQ(1, backing_observer.end_count);
}
} // namespace tts
} // namespace chromeos
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment