Commit b9345ef3 authored by Raymond Toy's avatar Raymond Toy Committed by Commit Bot

Don't allocate on audio thread and align work arrays

In WaveShaperCurveValues(), there are 5 work arrays that used to be
allocated on the audio thread.  Move these to the class so we don't do
that anymore.  And while we're at it, make them AudioFloatArray's so
they're appropriately aligned for best SIMD/NEON performance.

Running Spotify's Web Audio Benchmark shows that we get 10% improvement
in speed for WaveShaper-1x, and about 3-4% for WaveShaper-2x and 4x.

No functional changes; all tests pass.

Bug: 1013118
Change-Id: Ide6340ee79d567068e0661abd0b20f14bb064166
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2210821
Commit-Queue: Raymond Toy <rtoy@chromium.org>
Reviewed-by: default avatarHongchan Choi <hongchan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#771062}
parent 547e9589
...@@ -36,7 +36,14 @@ ...@@ -36,7 +36,14 @@
namespace blink { namespace blink {
WaveShaperDSPKernel::WaveShaperDSPKernel(WaveShaperProcessor* processor) WaveShaperDSPKernel::WaveShaperDSPKernel(WaveShaperProcessor* processor)
: AudioDSPKernel(processor), tail_time_(0) { : AudioDSPKernel(processor),
tail_time_(0),
// 4 times render size to handle 4x oversampling.
virtual_index_(4 * audio_utilities::kRenderQuantumFrames),
index_(4 * audio_utilities::kRenderQuantumFrames),
v1_(4 * audio_utilities::kRenderQuantumFrames),
v2_(4 * audio_utilities::kRenderQuantumFrames),
f_(4 * audio_utilities::kRenderQuantumFrames) {
if (processor->Oversample() != WaveShaperProcessor::kOverSampleNone) if (processor->Oversample() != WaveShaperProcessor::kOverSampleNone)
LazyInitializeOversampling(); LazyInitializeOversampling();
} }
...@@ -114,8 +121,9 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination, ...@@ -114,8 +121,9 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
uint32_t frames_to_process, uint32_t frames_to_process,
const float* curve_data, const float* curve_data,
int curve_length) const { int curve_length) const {
DCHECK_LE(frames_to_process, virtual_index_.size());
// Index into the array computed from the source value. // Index into the array computed from the source value.
float virtual_index[frames_to_process]; float* virtual_index = virtual_index_.Data();
// virtual_index[k] = // virtual_index[k] =
// clampTo(0.5 * (source[k] + 1) * (curve_length - 1), // clampTo(0.5 * (source[k] + 1) * (curve_length - 1),
...@@ -134,16 +142,20 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination, ...@@ -134,16 +142,20 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
frames_to_process); frames_to_process);
// index = floor(virtual_index) // index = floor(virtual_index)
float index[frames_to_process]; DCHECK_LE(frames_to_process, index_.size());
float* index = index_.Data();
// v1 and v2 hold the curve_data corresponding to the closest curve // v1 and v2 hold the curve_data corresponding to the closest curve
// values to the source sample. To save memory, v1 will use the // values to the source sample. To save memory, v1 will use the
// destination array. // destination array.
float* v1 = destination; DCHECK_LE(frames_to_process, v1_.size());
float v2[frames_to_process]; DCHECK_LE(frames_to_process, v2_.size());
float* v1 = v1_.Data();
float* v2 = v2_.Data();
// Interpolation factor: virtual_index - index. // Interpolation factor: virtual_index - index.
float f[frames_to_process]; DCHECK_LE(frames_to_process, f_.size());
float* f = f_.Data();
int max_index = curve_length - 1; int max_index = curve_length - 1;
unsigned k = 0; unsigned k = 0;
...@@ -216,9 +228,10 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination, ...@@ -216,9 +228,10 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
int32x4_t index2 = vaddq_s32(index1, one); int32x4_t index2 = vaddq_s32(index1, one);
index2 = vmaxq_s32(vminq_s32(index2, max), zero); index2 = vmaxq_s32(vminq_s32(index2, max), zero);
// Save index1/2 so we can get the individual parts. // Save index1/2 so we can get the individual parts. Aligned to
int32_t i1[4]; // 16 bytes for vst1q instruction.
int32_t i2[4]; int32_t i1[4] __attribute__((aligned(16)));
int32_t i2[4] __attribute__((aligned(16)));
vst1q_s32(i1, index1); vst1q_s32(i1, index1);
vst1q_s32(i2, index2); vst1q_s32(i2, index2);
...@@ -257,7 +270,7 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination, ...@@ -257,7 +270,7 @@ void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
// = v1[k] + f[k]*(v2[k] - v1[k]) // = v1[k] + f[k]*(v2[k] - v1[k])
vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process); vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process);
vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process); vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process);
vector_math::Vadd(v2, 1, destination, 1, destination, 1, frames_to_process); vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process);
} }
void WaveShaperDSPKernel::ProcessCurve(const float* source, void WaveShaperDSPKernel::ProcessCurve(const float* source,
......
...@@ -104,6 +104,16 @@ class WaveShaperDSPKernel final : public AudioDSPKernel { ...@@ -104,6 +104,16 @@ class WaveShaperDSPKernel final : public AudioDSPKernel {
// has an infinite tail so that silent input continues to produce non-silent // has an infinite tail so that silent input continues to produce non-silent
// output. // output.
double tail_time_; double tail_time_;
// Work arrays needed by WaveShaperCurveValues(). Mutable so this
// const function can modify these arrays. There's no state or
// anything kept here. See WaveShaperCurveValues() for details on
// what these hold.
mutable AudioFloatArray virtual_index_;
mutable AudioFloatArray index_;
mutable AudioFloatArray v1_;
mutable AudioFloatArray v2_;
mutable AudioFloatArray f_;
}; };
} // namespace blink } // namespace blink
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment