Commit 5c7ae321 authored by David Tseng's avatar David Tseng Committed by Commit Bot

Introduce a queue of audio buffers for tts audio playback

R=dmazzoni@chromium.org

Test: run through all Tts Debug tests; manually using Eve/Hana; word callbacks; continuous read on large blocks; trigger lots of interruptions; start/end callback testing.

Change-Id: I7ebc43620adbd5adc9de23ead8af12bde9f4bf14
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2465564Reviewed-by: default avatarDominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: David Tseng <dtseng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#816194}
parent 7e150eef
......@@ -41,9 +41,7 @@ void HandleLibraryLogging(int severity, const char* message) {
// methods utilize C features only.
TtsService::TtsService(mojo::PendingReceiver<mojom::TtsService> receiver)
: service_receiver_(this, std::move(receiver)),
stream_receiver_(this),
got_first_buffer_(false) {
: service_receiver_(this, std::move(receiver)), stream_receiver_(this) {
if (setpriority(PRIO_PROCESS, 0, -10 /* real time audio */) != 0) {
PLOG(ERROR) << "Unable to request real time priority; performance will be "
"impacted.";
......@@ -62,7 +60,6 @@ TtsService::~TtsService() = default;
void TtsService::BindTtsStream(
mojo::PendingReceiver<mojom::TtsStream> receiver,
mojo::PendingRemote<audio::mojom::StreamFactory> factory) {
base::AutoLock al(state_lock_);
stream_receiver_.Bind(std::move(receiver));
// TODO(accessibility): The sample rate below can change based on the audio
......@@ -79,8 +76,6 @@ void TtsService::BindTtsStream(
void TtsService::InstallVoice(const std::string& voice_name,
const std::vector<uint8_t>& voice_bytes,
InstallVoiceCallback callback) {
base::AutoLock al(state_lock_);
// Create a directory to place extracted voice data.
base::FilePath voice_data_path(kTempDataDirectory);
voice_data_path = voice_data_path.Append(voice_name);
......@@ -101,8 +96,6 @@ void TtsService::InstallVoice(const std::string& voice_name,
void TtsService::SelectVoice(const std::string& voice_name,
SelectVoiceCallback callback) {
base::AutoLock al(state_lock_);
base::FilePath path_prefix =
base::FilePath(kTempDataDirectory).Append(voice_name);
base::FilePath pipeline_path = path_prefix.Append("pipeline");
......@@ -112,8 +105,6 @@ void TtsService::SelectVoice(const std::string& voice_name,
void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
SpeakCallback callback) {
base::AutoLock al(state_lock_);
tts_event_observer_.reset();
auto pending_receiver = tts_event_observer_.BindNewPipeAndPassReceiver();
std::move(callback).Run(std::move(pending_receiver));
......@@ -125,16 +116,13 @@ void TtsService::Speak(const std::vector<uint8_t>& text_jspb,
return;
}
// For lower end devices, pre-fetching the first buffer on the main thread is
// important. Not doing so can cause us to not respond quickly enough in the
// audio rendering thread/callback below.
size_t frames = 0;
first_buf_.first.clear();
first_buf_.first.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
first_buf_.second =
libchrometts_.GoogleTtsReadBuffered(&first_buf_.first[0], &frames);
output_device_->Play();
is_playing_ = true;
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
true /* is_first_buffer */));
}
void TtsService::Stop() {
......@@ -143,7 +131,6 @@ void TtsService::Stop() {
}
void TtsService::SetVolume(float volume) {
base::AutoLock al(state_lock_);
output_device_->SetVolume(volume);
}
......@@ -151,69 +138,104 @@ int TtsService::Render(base::TimeDelta delay,
base::TimeTicks delay_timestamp,
int prior_frames_skipped,
media::AudioBus* dest) {
// Careful to not block the render callback. Only try to acquire the lock
// here, but early return if we are processing a series of other calls. This
// can be extremely important if there's a long queue of pending Speak/Stop
// pairs being processed on the main thread. This can occur if the tts api
// receives lots of tts requests.
if (!state_lock_.Try())
return 0;
size_t frames = 0;
float* channel = dest->channel(0);
size_t frames_in_buf = 0;
int32_t status = -1;
if (got_first_buffer_) {
status = libchrometts_.GoogleTtsReadBuffered(channel, &frames);
} else {
status = first_buf_.second;
float* buf = &first_buf_.first[0];
frames = first_buf_.first.size();
for (size_t i = 0; i < frames; i++)
channel[i] = buf[i];
}
if (status <= 0) {
// -1 means an error, 0 means done.
if (status == -1)
tts_event_observer_->OnError();
dest->Zero();
StopLocked();
state_lock_.Release();
return 0;
}
if (frames == 0) {
state_lock_.Release();
return 0;
int char_index = -1;
bool is_first_buffer = false;
{
base::AutoLock al(state_lock_);
if (buffers_.empty())
return 0;
const AudioBuffer& buf = buffers_.front();
status = buf.status;
// Done, 0, or error, -1.
if (status <= 0) {
if (status == -1)
tts_event_observer_->OnError();
else
tts_event_observer_->OnEnd();
StopLocked();
return 0;
}
char_index = buf.char_index;
is_first_buffer = buf.is_first_buffer;
const float* frames = &buf.frames[0];
frames_in_buf = buf.frames.size();
float* channel = dest->channel(0);
for (size_t i = 0; i < frames_in_buf; i++)
channel[i] = frames[i];
buffers_.pop_front();
}
if (!got_first_buffer_) {
got_first_buffer_ = true;
if (is_first_buffer)
tts_event_observer_->OnStart();
}
// There's only really ever one timepoint since we play this buffer in one
// chunk.
int char_index = -1;
if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
if (frames_in_buf == 0)
return 0;
if (char_index != -1)
tts_event_observer_->OnTimepoint(char_index);
state_lock_.Release();
return frames;
return frames_in_buf;
}
void TtsService::OnRenderError() {}
void TtsService::StopLocked() {
if (!is_playing_)
return;
output_device_->Pause();
libchrometts_.GoogleTtsFinalizeBuffered();
if (tts_event_observer_ && got_first_buffer_)
tts_event_observer_->OnEnd();
got_first_buffer_ = false;
buffers_.clear();
is_playing_ = false;
}
void TtsService::ReadMoreFrames(bool is_first_buffer) {
if (!is_playing_)
return;
AudioBuffer buf;
buf.frames.resize(libchrometts_.GoogleTtsGetFramesInAudioBuffer());
size_t frames_in_buf = 0;
buf.status =
libchrometts_.GoogleTtsReadBuffered(&buf.frames[0], &frames_in_buf);
buf.frames.resize(frames_in_buf);
buf.char_index = -1;
if (libchrometts_.GoogleTtsGetTimepointsCount() > 0)
buf.char_index = libchrometts_.GoogleTtsGetTimepointsCharIndexAtIndex(0);
buf.is_first_buffer = is_first_buffer;
{
base::AutoLock al(state_lock_);
buffers_.emplace_back(std::move(buf));
}
if (buf.status <= 0)
return;
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&TtsService::ReadMoreFrames, base::Unretained(this),
false /* is_first_buffer */));
}
TtsService::AudioBuffer::AudioBuffer() = default;
TtsService::AudioBuffer::~AudioBuffer() = default;
TtsService::AudioBuffer::AudioBuffer(TtsService::AudioBuffer&& other) {
frames.swap(other.frames);
status = other.status;
char_index = other.char_index;
is_first_buffer = other.is_first_buffer;
}
} // namespace tts
......
......@@ -54,6 +54,8 @@ class TtsService : public mojom::TtsService,
// Handles stopping tts.
void StopLocked() EXCLUSIVE_LOCKS_REQUIRED(state_lock_);
void ReadMoreFrames(bool is_first_buffer);
// Connection to tts in the browser.
mojo::Receiver<mojom::TtsService> service_receiver_;
......@@ -61,26 +63,35 @@ class TtsService : public mojom::TtsService,
base::Lock state_lock_;
// Prebuilt.
LibChromeTtsLoader libchrometts_ GUARDED_BY(state_lock_);
LibChromeTtsLoader libchrometts_;
// Connection to tts in the component extension.
mojo::Receiver<mojom::TtsStream> stream_receiver_ GUARDED_BY(state_lock_);
mojo::Receiver<mojom::TtsStream> stream_receiver_;
// Connection to send tts events to component extension.
mojo::Remote<mojom::TtsEventObserver> tts_event_observer_
GUARDED_BY(state_lock_);
mojo::Remote<mojom::TtsEventObserver> tts_event_observer_;
// Outputs speech synthesis to audio.
std::unique_ptr<audio::OutputDevice> output_device_ GUARDED_BY(state_lock_);
// Tracks whether any audio data came as a result of |Speak|. Reset for every
// call to |Speak|.
bool got_first_buffer_ GUARDED_BY(state_lock_);
// The first buffer; used for prefetching/warming up the engine for a new
// utterance. The first item is the audio data, the second is the status
// returned by a call to GoogleTtsReadBuffered.
std::pair<std::vector<float>, size_t> first_buf_;
std::unique_ptr<audio::OutputDevice> output_device_;
// Helper group of state to pass from main thread to audio thread.
struct AudioBuffer {
AudioBuffer();
~AudioBuffer();
AudioBuffer(const AudioBuffer& other) = delete;
AudioBuffer(AudioBuffer&& other);
std::vector<float> frames;
int char_index;
int status;
bool is_first_buffer;
};
// The queue of audio buffers to be played by the audio thread.
std::deque<AudioBuffer> buffers_ GUARDED_BY(state_lock_);
// Tracks whether the output device is playing audio.
bool is_playing_ = false;
};
} // namespace tts
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment