Varispeed support for SincResampler.

Provides a 3x speedup on all platforms for subsequent kernel creations with only an extra 8k in memory. BUG=none TEST=media_unittests Review URL: https://codereview.chromium.org/13741004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@194690 0039d316-1c4b-4281-b951-d872f2087c98

Varispeed support for SincResampler.
Provides a 3x speedup on all platforms for subsequent kernel creations with only an extra 8k in memory. BUG=none TEST=media_unittests Review URL: https://codereview.chromium.org/13741004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@194690 0039d316-1c4b-4281-b951-d872f2087c98
bcae835d · dalecurtis@google.com · 0463af04 · bcae835d · bcae835d · bcae835d
Commit bcae835d authored Apr 17, 2013 by dalecurtis@google.com
5 changed files
--- a/media/base/multi_channel_resampler.cc
+++ b/media/base/multi_channel_resampler.cc
@@ -101,4 +101,9 @@ void MultiChannelResampler::Flush() {
    resamplers_[i]->Flush();
 }
+void MultiChannelResampler::SetRatio(double io_sample_rate_ratio) {
+  for (size_t i = 0; i < resamplers_.size(); ++i)
+    resamplers_[i]->SetRatio(io_sample_rate_ratio);
+}
 }  // namespace media
--- a/media/base/multi_channel_resampler.h
+++ b/media/base/multi_channel_resampler.h
@@ -35,9 +35,15 @@ class MEDIA_EXPORT MultiChannelResampler {
  // Resamples |frames| of data from |read_cb_| into AudioBus.
  void Resample(AudioBus* audio_bus, int frames);
-  // Flush all buffered data and reset internal indices.
+  // Flush all buffered data and reset internal indices.  Not thread safe, do
+  // not call while Resample() is in progress.
  void Flush();
+  // Update ratio for all SincResamplers.  SetRatio() will cause reconstruction
+  // of the kernels used for resampling.  Not thread safe, do not call while
+  // Resample() is in progress.
+  void SetRatio(double io_sample_rate_ratio);
 private:
  // SincResampler::ReadCB implementation.  ProvideInput() will be called for
  // each channel (in channel order) as SincResampler needs more data.

--- a/media/base/sinc_resampler.cc
+++ b/media/base/sinc_resampler.cc
@@ -37,6 +37,7 @@
 #include "media/base/sinc_resampler.h"
 #include <cmath>
+#include <limits>
 #include "base/cpu.h"
 #include "base/logging.h"
@@ -47,6 +48,22 @@
 namespace media {
+static double SincScaleFactor(double io_ratio) {
+  // |sinc_scale_factor| is basically the normalized cutoff frequency of the
+  // low-pass filter.
+  double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;
+  // The sinc function is an idealized brick-wall filter, but since we're
+  // windowing it the transition from pass to stop does not happen right away.
+  // So we should adjust the low pass filter cutoff slightly downward to avoid
+  // some aliasing at the very high-end.
+  // TODO(crogers): this value is empirical and to be more exact should vary
+  // depending on kKernelSize.
+  sinc_scale_factor *= 0.9;
+  return sinc_scale_factor;
+}
 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
    : io_sample_rate_ratio_(io_sample_rate_ratio),
      virtual_source_idx_(0),
@@ -55,6 +72,10 @@ SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
      // Create input buffers with a 16-byte alignment for SSE optimizations.
      kernel_storage_(static_cast<float*>(
          base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
+      kernel_pre_sinc_storage_(static_cast<float*>(
+          base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
+      kernel_window_storage_(static_cast<float*>(
+          base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
      input_buffer_(static_cast<float*>(
          base::AlignedAlloc(sizeof(float) * kBufferSize, 16))),
 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)
@@ -89,6 +110,10 @@ SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
  memset(kernel_storage_.get(), 0,
         sizeof(*kernel_storage_.get()) * kKernelStorageSize);
+  memset(kernel_pre_sinc_storage_.get(), 0,
+         sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
+  memset(kernel_window_storage_.get(), 0,
+         sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
  memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize);
  InitializeKernel();
@@ -103,38 +128,59 @@ void SincResampler::InitializeKernel() {
  static const double kA1 = 0.5;
  static const double kA2 = 0.5 * kAlpha;
-  // |sinc_scale_factor| is basically the normalized cutoff frequency of the
-  // low-pass filter.
-  double sinc_scale_factor =
-      io_sample_rate_ratio_ > 1.0 ? 1.0 / io_sample_rate_ratio_ : 1.0;
-  // The sinc function is an idealized brick-wall filter, but since we're
-  // windowing it the transition from pass to stop does not happen right away.
-  // So we should adjust the low pass filter cutoff slightly downward to avoid
-  // some aliasing at the very high-end.
-  // TODO(crogers): this value is empirical and to be more exact should vary
-  // depending on kKernelSize.
-  sinc_scale_factor *= 0.9;
  // Generates a set of windowed sinc() kernels.
  // We generate a range of sub-sample offsets from 0.0 to 1.0.
+  const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
  for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
-    double subsample_offset =
+    const float subsample_offset =
-        static_cast<double>(offset_idx) / kKernelOffsetCount;
+        static_cast<float>(offset_idx) / kKernelOffsetCount;
    for (int i = 0; i < kKernelSize; ++i) {
-      // Compute the sinc with offset.
+      const int idx = i + offset_idx * kKernelSize;
-      double s =
+      const float pre_sinc = M_PI * (i - kKernelSize / 2 - subsample_offset);
-          sinc_scale_factor * M_PI * (i - kKernelSize / 2 - subsample_offset);
+      kernel_pre_sinc_storage_[idx] = pre_sinc;
-      double sinc = (!s ? 1.0 : sin(s) / s) * sinc_scale_factor;
      // Compute Blackman window, matching the offset of the sinc().
-      double x = (i - subsample_offset) / kKernelSize;
+      const float x = (i - subsample_offset) / kKernelSize;
-      double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2
+      const float window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2
          * cos(4.0 * M_PI * x);
+      kernel_window_storage_[idx] = window;
+      // Compute the sinc with offset, then window the sinc() function and store
+      // at the correct offset.
+      if (pre_sinc == 0) {
+        kernel_storage_[idx] = sinc_scale_factor * window;
+      } else {
+        kernel_storage_[idx] =
+            window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;
+      }
+    }
+  }
+}
+void SincResampler::SetRatio(double io_sample_rate_ratio) {
+  if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <
+      std::numeric_limits<double>::epsilon()) {
+    return;
+  }
+  io_sample_rate_ratio_ = io_sample_rate_ratio;
-      // Window the sinc() function and store at the correct offset.
+  // Optimize reinitialization by reusing values which are independent of
-      kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window;
+  // |sinc_scale_factor|.  Provides a 3x speedup.
+  const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
+  for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
+    for (int i = 0; i < kKernelSize; ++i) {
+      const int idx = i + offset_idx * kKernelSize;
+      const float window = kernel_window_storage_[idx];
+      const float pre_sinc = kernel_pre_sinc_storage_[idx];
+      if (pre_sinc == 0) {
+        kernel_storage_[idx] = sinc_scale_factor * window;
+      } else {
+        kernel_storage_[idx] =
+            window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;
+      }
    }
  }
 }

--- a/media/base/sinc_resampler.h
+++ b/media/base/sinc_resampler.h
@@ -60,9 +60,17 @@ class MEDIA_EXPORT SincResampler {
  // single call to |read_cb_| for more data.
  int ChunkSize() const;
-  // Flush all buffered data and reset internal indices.
+  // Flush all buffered data and reset internal indices.  Not thread safe, do
+  // not call while Resample() is in progress.
  void Flush();
+  // Update |io_sample_rate_ratio_|.  SetRatio() will cause a reconstruction of
+  // the kernels used for resampling.  Not thread safe, do not call while
+  // Resample() is in progress.
+  void SetRatio(double io_sample_rate_ratio);
+  float* get_kernel_for_testing() { return kernel_storage_.get(); }
 private:
  FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
  FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
@@ -86,7 +94,7 @@ class MEDIA_EXPORT SincResampler {
 #endif
  // The ratio of input / output sample rates.
-  const double io_sample_rate_ratio_;
+  double io_sample_rate_ratio_;
  // An index on the source input buffer with sub-sample precision.  It must be
  // double precision to avoid drift.
@@ -101,10 +109,12 @@ class MEDIA_EXPORT SincResampler {
  // Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
  // The kernel offsets are sub-sample shifts of a windowed sinc shifted from
  // 0.0 to 1.0 sample.
-  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> kernel_storage_;
+  scoped_ptr<float[], base::ScopedPtrAlignedFree> kernel_storage_;
+  scoped_ptr<float[], base::ScopedPtrAlignedFree> kernel_pre_sinc_storage_;
+  scoped_ptr<float[], base::ScopedPtrAlignedFree> kernel_window_storage_;
  // Data from the source is copied into this buffer for each processing pass.
-  scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_buffer_;
+  scoped_ptr<float[], base::ScopedPtrAlignedFree> input_buffer_;
  // Stores the runtime selection of which Convolve function to use.
 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)

--- a/media/base/sinc_resampler_unittest.cc
+++ b/media/base/sinc_resampler_unittest.cc
@@ -98,6 +98,22 @@ TEST(SincResamplerTest, Flush) {
    ASSERT_FLOAT_EQ(resampled_destination[i], 0);
 }
+// Test flush resets the internal state properly.
+TEST(SincResamplerTest, DISABLED_SetRatioBench) {
+  MockSource mock_source;
+  SincResampler resampler(
+      kSampleRateRatio,
+      base::Bind(&MockSource::ProvideInput, base::Unretained(&mock_source)));
+  base::TimeTicks start = base::TimeTicks::HighResNow();
+  for (int i = 1; i < 10000; ++i)
+    resampler.SetRatio(1.0 / i);
+  double total_time_c_ms =
+      (base::TimeTicks::HighResNow() - start).InMillisecondsF();
+  printf("SetRatio() took %.2fms.\n", total_time_c_ms);
+}
 // Define platform independent function name for Convolve* tests.
 #if defined(ARCH_CPU_X86_FAMILY)
 #define CONVOLVE_FUNC Convolve_SSE
@@ -299,11 +315,24 @@ TEST_P(SincResamplerTest, Resample) {
  SinusoidalLinearChirpSource resampler_source(
      input_rate_, input_samples, input_nyquist_freq);
+  const double io_ratio = input_rate_ / static_cast<double>(output_rate_);
  SincResampler resampler(
-      input_rate_ / static_cast<double>(output_rate_),
+      io_ratio,
      base::Bind(&SinusoidalLinearChirpSource::ProvideInput,
                 base::Unretained(&resampler_source)));
+  // Force an update to the sample rate ratio to ensure dyanmic sample rate
+  // changes are working correctly.
+  scoped_ptr<float[]> kernel(new float[SincResampler::kKernelStorageSize]);
+  memcpy(kernel.get(), resampler.get_kernel_for_testing(),
+         SincResampler::kKernelStorageSize);
+  resampler.SetRatio(M_PI);
+  ASSERT_NE(0, memcmp(kernel.get(), resampler.get_kernel_for_testing(),
+                      SincResampler::kKernelStorageSize));
+  resampler.SetRatio(io_ratio);
+  ASSERT_EQ(0, memcmp(kernel.get(), resampler.get_kernel_for_testing(),
+                      SincResampler::kKernelStorageSize));
  // TODO(dalecurtis): If we switch to AVX/SSE optimization, we'll need to
  // allocate these on 32-byte boundaries and ensure they're sized % 32 bytes.
  scoped_ptr<float[]> resampled_destination(new float[output_samples]);