Commit b9345ef3 authored by Raymond Toy's avatar Raymond Toy Committed by Commit Bot

Don't allocate on audio thread and align work arrays

In WaveShaperCurveValues(), there are 5 work arrays that used to be
allocated on the audio thread.  Move these to the class so we don't do
that anymore.  And while we're at it, make them AudioFloatArray's so
they're appropriately aligned for best SIMD/NEON performance.

Running Spotify's Web Audio Benchmark shows that we get 10% improvement
in speed for WaveShaper-1x, and about 3-4% for WaveShaper-2x and 4x.

No functional changes; all tests pass.

Bug: 1013118
Change-Id: Ide6340ee79d567068e0661abd0b20f14bb064166
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2210821
Commit-Queue: Raymond Toy <rtoy@chromium.org>
Reviewed-by: default avatarHongchan Choi <hongchan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#771062}
parent 547e9589
......@@ -36,7 +36,14 @@
namespace blink {
WaveShaperDSPKernel::WaveShaperDSPKernel(WaveShaperProcessor* processor)
: AudioDSPKernel(processor), tail_time_(0) {
: AudioDSPKernel(processor),
tail_time_(0),
// 4 times render size to handle 4x oversampling.
virtual_index_(4 * audio_utilities::kRenderQuantumFrames),
index_(4 * audio_utilities::kRenderQuantumFrames),
v1_(4 * audio_utilities::kRenderQuantumFrames),
v2_(4 * audio_utilities::kRenderQuantumFrames),
f_(4 * audio_utilities::kRenderQuantumFrames) {
if (processor->Oversample() != WaveShaperProcessor::kOverSampleNone)
LazyInitializeOversampling();
}
......@@ -114,8 +121,9 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
uint32_t frames_to_process,
const float* curve_data,
int curve_length) const {
DCHECK_LE(frames_to_process, virtual_index_.size());
// Index into the array computed from the source value.
float virtual_index[frames_to_process];
float* virtual_index = virtual_index_.Data();
// virtual_index[k] =
// clampTo(0.5 * (source[k] + 1) * (curve_length - 1),
......@@ -134,16 +142,20 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
frames_to_process);
// index = floor(virtual_index)
float index[frames_to_process];
DCHECK_LE(frames_to_process, index_.size());
float* index = index_.Data();
// v1 and v2 hold the curve_data corresponding to the closest curve
// values to the source sample. To save memory, v1 will use the
// destination array.
float* v1 = destination;
float v2[frames_to_process];
DCHECK_LE(frames_to_process, v1_.size());
DCHECK_LE(frames_to_process, v2_.size());
float* v1 = v1_.Data();
float* v2 = v2_.Data();
// Interpolation factor: virtual_index - index.
float f[frames_to_process];
DCHECK_LE(frames_to_process, f_.size());
float* f = f_.Data();
int max_index = curve_length - 1;
unsigned k = 0;
......@@ -216,9 +228,10 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
int32x4_t index2 = vaddq_s32(index1, one);
index2 = vmaxq_s32(vminq_s32(index2, max), zero);
// Save index1/2 so we can get the individual parts.
int32_t i1[4];
int32_t i2[4];
// Save index1/2 so we can get the individual parts. Aligned to
// 16 bytes for vst1q instruction.
int32_t i1[4] __attribute__((aligned(16)));
int32_t i2[4] __attribute__((aligned(16)));
vst1q_s32(i1, index1);
vst1q_s32(i2, index2);
......@@ -257,7 +270,7 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
// = v1[k] + f[k]*(v2[k] - v1[k])
vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process);
vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process);
vector_math::Vadd(v2, 1, destination, 1, destination, 1, frames_to_process);
vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process);
}
void WaveShaperDSPKernel::ProcessCurve(const float* source,
......
......@@ -104,6 +104,16 @@ class WaveShaperDSPKernel final : public AudioDSPKernel {
// has an infinite tail so that silent input continues to produce non-silent
// output.
double tail_time_;
// Work arrays needed by WaveShaperCurveValues(). Mutable so this
// const function can modify these arrays. There's no state or
// anything kept here. See WaveShaperCurveValues() for details on
// what these hold.
mutable AudioFloatArray virtual_index_;
mutable AudioFloatArray index_;
mutable AudioFloatArray v1_;
mutable AudioFloatArray v2_;
mutable AudioFloatArray f_;
};
} // namespace blink
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment