Reland "Add build file for PFFFT"

This is a reland of 8e4cf155 This CL includes the following changes: 1. when fuzzing, PFFFT is compiled disabling SIMD 2. SIMD is also disabled on fuchsia and android because PFFFT only checks __arm__ 3. unit test to validate the output of PFFFT (compared against that of FFTPACK) The first change fixes the problem due to which the original CL has been reverted; however, it makes fuzzing slower and reduces the coverage since SIMD cannot be used. Similarly, the second change is a temporary solution to allow landing this CL. SIMD will be re-enabled in a follow-up CL. Original change's description: > Add build file for PFFFT > > - fuzzer corpus generator and fuzzer targets > - fftpack isolated as private test only target (only needed for the benchmark) > > Bug: webrtc:9577 > Change-Id: Idc904bc4b05f945a7461a14893518551bbe34b84 > Reviewed-on: https://chromium-review.googlesource.com/c/1452000 > Commit-Queue: Ale Bzk <alessiob@chromium.org> > Reviewed-by: Nico Weber <thakis@chromium.org> > Reviewed-by: Olga Sharonova <olka@chromium.org> > Reviewed-by: Max Moroz <mmoroz@chromium.org> > Cr-Commit-Position: refs/heads/master@{#629627} Bug: webrtc:9577 Change-Id: Icfbb4b966c3ad866e9e2970b63363e0e258b1fea Reviewed-on: https://chromium-review.googlesource.com/c/1458076 Commit-Queue: Ale Bzk <alessiob@chromium.org> Reviewed-by: Nico Weber <thakis@chromium.org> Reviewed-by: Max Moroz <mmoroz@chromium.org> Reviewed-by: Max Morin <maxmorin@chromium.org> Cr-Commit-Position: refs/heads/master@{#630085}

Reland "Add build file for PFFFT"
This is a reland of 8e4cf155 This CL includes the following changes: 1. when fuzzing, PFFFT is compiled disabling SIMD 2. SIMD is also disabled on fuchsia and android because PFFFT only checks __arm__ 3. unit test to validate the output of PFFFT (compared against that of FFTPACK) The first change fixes the problem due to which the original CL has been reverted; however, it makes fuzzing slower and reduces the coverage since SIMD cannot be used. Similarly, the second change is a temporary solution to allow landing this CL. SIMD will be re-enabled in a follow-up CL. Original change's description: > Add build file for PFFFT > > - fuzzer corpus generator and fuzzer targets > - fftpack isolated as private test only target (only needed for the benchmark) > > Bug: webrtc:9577 > Change-Id: Idc904bc4b05f945a7461a14893518551bbe34b84 > Reviewed-on: https://chromium-review.googlesource.com/c/1452000 > Commit-Queue: Ale Bzk <alessiob@chromium.org> > Reviewed-by: Nico Weber <thakis@chromium.org> > Reviewed-by: Olga Sharonova <olka@chromium.org> > Reviewed-by: Max Moroz <mmoroz@chromium.org> > Cr-Commit-Position: refs/heads/master@{#629627} Bug: webrtc:9577 Change-Id: Icfbb4b966c3ad866e9e2970b63363e0e258b1fea Reviewed-on: https://chromium-review.googlesource.com/c/1458076 Commit-Queue: Ale Bzk <alessiob@chromium.org> Reviewed-by: Nico Weber <thakis@chromium.org> Reviewed-by: Max Moroz <mmoroz@chromium.org> Reviewed-by: Max Morin <maxmorin@chromium.org> Cr-Commit-Position: refs/heads/master@{#630085}
c1086af0 · Alessio Bazzica · Commit Bot · 2c3b57ca · c1086af0 · c1086af0
Commit c1086af0 authored Feb 07, 2019 by Alessio Bazzica Committed by Commit Bot Feb 07, 2019
13 changed files
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -775,6 +775,13 @@ group("gn_all") {
      "//tools/accessibility/inspect:ax_dump_tree",
    ]
  }
+
+  # PFFFT.
+  deps += [
+    "//third_party/pffft:fuzzers",
+    "//third_party/pffft:pffft_benchmark",
+    "//third_party/pffft:pffft_unittest",
+  ]
 }

 if ((is_linux || is_win) && enable_remoting && !use_ozone) {

--- a/third_party/pffft/BUILD.gn
+++ b/third_party/pffft/BUILD.gn
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//testing/libfuzzer/fuzzer_test.gni")
+import("//testing/test.gni")
+
+# TODO(https://crbug.com/webrtc/9577): Add architecture specific flags.
+
+config("simd_config") {
+  # TODO(https://crbug.com/webrtc/9577): Increase fuzzer coverage by using SIMD.
+  # TODO(https://crbug.com/webrtc/9577): Fix SIMD not detected on android arm64.
+  # TODO(https://crbug.com/webrtc/9577): Fix SIMD not detected on fuchsia arm64.
+  if (use_fuzzing_engine || is_android || is_fuchsia) {
+    cflags = [ "-DPFFFT_SIMD_DISABLE" ]
+  }
+}
+
+static_library("pffft") {
+  configs += [ ":simd_config" ]
+  sources = [
+    "src/pffft.c",
+    "src/pffft.h",
+  ]
+}
+
+# Fuzzing.
+
+group("fuzzers") {
+  testonly = true
+  deps = [
+    ":pffft_complex_fuzzer",
+    ":pffft_real_fuzzer",
+  ]
+}
+
+fuzzer_testdata_dir = "$target_gen_dir/testdata"
+
+action("generate_pffft_fuzzer_corpus") {
+  script = "generate_seed_corpus.py"
+  sources = [
+    "generate_seed_corpus.py",
+  ]
+  args = [ rebase_path(fuzzer_testdata_dir, root_build_dir) ]
+  outputs = [
+    fuzzer_testdata_dir,
+  ]
+}
+
+fuzzer_test("pffft_complex_fuzzer") {
+  sources = [
+    "pffft_fuzzer.cc",
+  ]
+  cflags = [ "-DTRANSFORM_COMPLEX" ]
+  deps = [
+    ":pffft",
+  ]
+  seed_corpus = fuzzer_testdata_dir
+  seed_corpus_deps = [ ":generate_pffft_fuzzer_corpus" ]
+}
+
+fuzzer_test("pffft_real_fuzzer") {
+  sources = [
+    "pffft_fuzzer.cc",
+  ]
+  cflags = [ "-DTRANSFORM_REAL" ]
+  deps = [
+    ":pffft",
+  ]
+  seed_corpus = fuzzer_testdata_dir
+  seed_corpus_deps = [ ":generate_pffft_fuzzer_corpus" ]
+}
+
+# Unit tests and benchmark.
+
+# This target must be used only for testing and benchmark purposes.
+static_library("fftpack") {
+  testonly = true
+  sources = [
+    "src/fftpack.c",
+    "src/fftpack.h",
+  ]
+  visibility = [ ":*" ]
+}
+
+executable("pffft_benchmark") {
+  configs += [ ":simd_config" ]
+  testonly = true
+  sources = [
+    "src/test_pffft.c",
+  ]
+  deps = [
+    ":fftpack",
+    ":pffft",
+  ]
+}
+
+test("pffft_unittest") {
+  sources = [
+    "pffft_unittest.cc",
+  ]
+  deps = [
+    ":fftpack",
+    ":pffft",
+    "//testing/gtest",
+    "//testing/gtest:gtest_main",
+  ]
+}
--- a/third_party/pffft/DEPS
+++ b/third_party/pffft/DEPS
+include_rules = [
+  "+testing/gtest/include/gtest",
+]
--- a/third_party/pffft/README.chromium
+++ b/third_party/pffft/README.chromium
@@ -13,3 +13,6 @@ Description:
 PFFFT does 1D Fast Fourier Transforms, of single precision real and complex vectors. It tries do it fast, it tries to be correct, and it tries to be small. Computations do take advantage of SSE1 instructions on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus.

 Local Modifications:
+ * 01-add_m_pi.diff: define M_PI if not defined
+ * 02-rmv_printf.diff: remove printf and stop including stdio.h
+ * 03-decl_validate_simd.diff: declare validate_pffft_simd() in pffft.h
--- a/third_party/pffft/generate_seed_corpus.py
+++ b/third_party/pffft/generate_seed_corpus.py
+#!/usr/bin/env python
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from __future__ import division
+from __future__ import print_function
+
+from array import array
+import os
+import random
+import sys
+
+MAX_INPUT_SIZE = int(1e6)
+MAX_FLOAT32 = 3.4028235e+38
+
+
+def IsValidSize(n):
+  if n == 0:
+    return False
+  # PFFFT only supports transforms for inputs of length N of the form
+  # N = (2^a)*(3^b)*(5^c) where a >= 5, b >=0, c >= 0.
+  FACTORS = [2, 3, 5]
+  factorization = [0, 0, 0]
+  for i, factor in enumerate(FACTORS):
+    while n % factor == 0:
+      n = n // factor
+      factorization[i] += 1
+  return factorization[0] >= 5 and n == 1
+
+
+def WriteFloat32ArrayToFile(file_path, size, generator):
+  """Generate an array of float32 values and writes to file."""
+  with open(file_path, 'wb') as f:
+    float_array = array('f', [generator() for _ in range(size)])
+    float_array.tofile(f)
+
+
+def main():
+  if len(sys.argv) < 2:
+    print('Usage: %s <path to output directory>' % sys.argv[0])
+    sys.exit(1)
+
+  output_path = sys.argv[1]
+  # Create output directory if missing.
+  if not os.path.exists(output_path):
+    os.makedirs(output_path)
+
+  # List of valid input sizes.
+  N = [n for n in range(MAX_INPUT_SIZE) if IsValidSize(n)]
+
+  # Set the seed to always generate the same random data.
+  random.seed(0)
+
+  # Generate different types of input arrays for each target length.
+  for n in N:
+    # Zeros.
+    WriteFloat32ArrayToFile(
+        os.path.join(output_path, 'zeros_%d' % n), n, lambda: 0)
+    # Max float 32.
+    WriteFloat32ArrayToFile(
+        os.path.join(output_path, 'max_%d' % n), n, lambda: MAX_FLOAT32)
+    # Random values in the s16 range.
+    rnd_s16 = lambda: 32768.0 * 2.0 * (random.random() - 0.5)
+    WriteFloat32ArrayToFile(
+        os.path.join(output_path, 'rnd_s16_%d' % n), n, rnd_s16)
+
+  sys.exit(0)
+
+
+if __name__ == '__main__':
+  main()
--- a/third_party/pffft/patches/01-add_m_pi.diff
+++ b/third_party/pffft/patches/01-add_m_pi.diff
+diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
+index 0603f2b1f698..7934db448a09 100644
+--- a/third_party/pffft/src/pffft.c
+++ b/third_party/pffft/src/pffft.c
+@@ -82,6 +82,9 @@
+ #  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
+ #endif
+ 
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+ 
+ /* 
+    vector support macros: the rest of the code is independant of
--- a/third_party/pffft/patches/02-rmv_printf.diff
+++ b/third_party/pffft/patches/02-rmv_printf.diff
+diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
+index 7934db448a09..2e0c2f651438 100644
+--- a/third_party/pffft/src/pffft.c
+++ b/third_party/pffft/src/pffft.c
+@@ -59,7 +59,7 @@
+ 
+ #include "pffft.h"
+ #include <stdlib.h>
+-#include <stdio.h>
+// #include <stdio.h>
+ #include <math.h>
+ #include <assert.h>
+ 
+@@ -222,31 +222,35 @@ void validate_pffft_simd() {
+   memcpy(a3.f, f+12, 4*sizeof(float));
+ 
+   t = a0; u = a1; t.v = VZERO();
+-  printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
+  // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 0, 0, 0, 0);
+   t.v = VADD(a1.v, a2.v);
+-  printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
+  // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 12, 14, 16, 18);
+   t.v = VMUL(a1.v, a2.v);
+-  printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
+  // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 32, 45, 60, 77);
+   t.v = VMADD(a1.v, a2.v,a0.v);
+-  printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
+  // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 32, 46, 62, 80);
+ 
+   INTERLEAVE2(a1.v,a2.v,t.v,u.v);
+-  printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+   assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
+   UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
+-  printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+   assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
+ 
+   t.v=LD_PS1(f[15]);
+-  printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+   assertv4(t, 15, 15, 15, 15);
+   t.v = VSWAPHL(a1.v, a2.v);
+-  printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+   assertv4(t, 8, 9, 6, 7);
+   VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
+-  printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", 
+-         a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], 
+-         a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); 
+  // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", 
+  //        a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], 
+  //        a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); 
+   assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
+ }
+ #endif //!PFFFT_SIMD_DISABLE
--- a/third_party/pffft/patches/03-decl_validate_simd.diff
+++ b/third_party/pffft/patches/03-decl_validate_simd.diff
+diff --git a/third_party/pffft/src/pffft.h b/third_party/pffft/src/pffft.h
+index 2bfa7b3ebcfb..bb6f78d4b795 100644
+--- a/third_party/pffft/src/pffft.h
+++ b/third_party/pffft/src/pffft.h
+@@ -83,6 +83,11 @@
+ extern "C" {
+ #endif
+ 
+#ifndef PFFFT_SIMD_DISABLE
+  // Detects compiler bugs with respect to simd instruction.
+  void validate_pffft_simd();
+#endif
+
+   /* opaque struct holding internal stuff (precomputed twiddle factors)
+      this struct can be shared by many threads as it contains only
+      read-only data.  
--- a/third_party/pffft/pffft_fuzzer.cc
+++ b/third_party/pffft/pffft_fuzzer.cc
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstring>
+
+#include "third_party/pffft/src/pffft.h"
+
+namespace {
+
+#if defined(TRANSFORM_REAL)
+// Real FFT.
+constexpr pffft_transform_t kTransform = PFFFT_REAL;
+constexpr size_t kSizeOfOneSample = sizeof(float);
+#elif defined(TRANSFORM_COMPLEX)
+// Complex FFT.
+constexpr pffft_transform_t kTransform = PFFFT_COMPLEX;
+constexpr size_t kSizeOfOneSample = 2 * sizeof(float);  // Real plus imaginary.
+#else
+#error FFT transform type not defined.
+#endif
+
+bool IsValidSize(size_t n) {
+  if (n == 0) {
+    return false;
+  }
+  // PFFFT only supports transforms for inputs of length N of the form
+  // N = (2^a)*(3^b)*(5^c) where a >= 5, b >=0, c >= 0.
+  constexpr std::array<int, 3> kFactors = {2, 3, 5};
+  std::array<int, kFactors.size()> factorization{};
+  for (size_t i = 0; i < kFactors.size(); ++i) {
+    const int factor = kFactors[i];
+    while (n % factor == 0) {
+      n /= factor;
+      factorization[i]++;
+    }
+  }
+  return factorization[0] >= 5 && n == 1;
+}
+
+float* AllocatePffftBuffer(size_t number_of_bytes) {
+  return static_cast<float*>(pffft_aligned_malloc(number_of_bytes));
+}
+
+}  // namespace
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  // Set the number of FFT points to use |data| as input vector.
+  // The latter is truncated if the number of bytes is not an integer
+  // multiple of the size of one sample (which is either a real or a complex
+  // floating point number).
+  const size_t fft_size = size / kSizeOfOneSample;
+  if (!IsValidSize(fft_size)) {
+    return 0;
+  }
+
+  const size_t number_of_bytes = fft_size * kSizeOfOneSample;
+  assert(number_of_bytes <= size);
+
+  // Allocate input and output buffers.
+  float* in = AllocatePffftBuffer(number_of_bytes);
+  float* out = AllocatePffftBuffer(number_of_bytes);
+
+  // Copy input data.
+  std::memcpy(in, reinterpret_cast<const float*>(data), number_of_bytes);
+
+  // Setup FFT.
+  PFFFT_Setup* pffft_setup = pffft_new_setup(fft_size, kTransform);
+
+  // Call different PFFFT functions to maximize the coverage.
+  pffft_transform(pffft_setup, in, out, nullptr, PFFFT_FORWARD);
+  pffft_zconvolve_accumulate(pffft_setup, out, out, out, 1.f);
+  pffft_transform_ordered(pffft_setup, in, out, nullptr, PFFFT_BACKWARD);
+
+  // Release memory.
+  pffft_aligned_free(in);
+  pffft_aligned_free(out);
+  pffft_destroy_setup(pffft_setup);
+
+  return 0;
+}
--- a/third_party/pffft/pffft_unittest.cc
+++ b/third_party/pffft/pffft_unittest.cc
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <cmath>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/pffft/src/fftpack.h"
+#include "third_party/pffft/src/pffft.h"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+
+namespace pffft {
+namespace test {
+namespace {
+
+static constexpr int kFftSizes[] = {
+    16,  32,      64,  96,  128,  160,  192,  256,  288,  384,   5 * 96, 512,
+    576, 5 * 128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864};
+
+double frand() {
+  return rand() / (double)RAND_MAX;
+}
+
+void PffftValidate(int fft_size, bool complex_fft) {
+  PFFFT_Setup* pffft_status =
+      pffft_new_setup(fft_size, complex_fft ? PFFFT_COMPLEX : PFFFT_REAL);
+  ASSERT_TRUE(pffft_status) << "FFT size (" << fft_size << ") not supported.";
+
+  int num_floats = fft_size * (complex_fft ? 2 : 1);
+  int num_bytes = num_floats * sizeof(float);
+  float* ref = static_cast<float*>(pffft_aligned_malloc(num_bytes));
+  float* in = static_cast<float*>(pffft_aligned_malloc(num_bytes));
+  float* out = static_cast<float*>(pffft_aligned_malloc(num_bytes));
+  float* tmp = static_cast<float*>(pffft_aligned_malloc(num_bytes));
+  float* tmp2 = static_cast<float*>(pffft_aligned_malloc(num_bytes));
+
+  for (int pass = 0; pass < 2; ++pass) {
+    SCOPED_TRACE(pass);
+    float ref_max = 0;
+    int k;
+
+    // Compute reference solution with FFTPACK.
+    if (pass == 0) {
+      float* fftpack_buff =
+          static_cast<float*>(malloc(2 * num_bytes + 15 * sizeof(float)));
+      for (k = 0; k < num_floats; ++k) {
+        ref[k] = in[k] = frand() * 2 - 1;
+        out[k] = 1e30;
+      }
+
+      if (!complex_fft) {
+        rffti(fft_size, fftpack_buff);
+        rfftf(fft_size, ref, fftpack_buff);
+        // Use our ordering for real FFTs instead of the one of fftpack.
+        {
+          float refN = ref[fft_size - 1];
+          for (k = fft_size - 2; k >= 1; --k)
+            ref[k + 1] = ref[k];
+          ref[1] = refN;
+        }
+      } else {
+        cffti(fft_size, fftpack_buff);
+        cfftf(fft_size, ref, fftpack_buff);
+      }
+
+      free(fftpack_buff);
+    }
+
+    for (k = 0; k < num_floats; ++k) {
+      ref_max = MAX(ref_max, fabs(ref[k]));
+    }
+
+    // Pass 0: non canonical ordering of transform coefficients.
+    if (pass == 0) {
+      // Test forward transform, with different input / output.
+      pffft_transform(pffft_status, in, tmp, 0, PFFFT_FORWARD);
+      memcpy(tmp2, tmp, num_bytes);
+      memcpy(tmp, in, num_bytes);
+      pffft_transform(pffft_status, tmp, tmp, 0, PFFFT_FORWARD);
+      for (k = 0; k < num_floats; ++k) {
+        SCOPED_TRACE(k);
+        EXPECT_EQ(tmp2[k], tmp[k]);
+      }
+
+      // Test reordering.
+      pffft_zreorder(pffft_status, tmp, out, PFFFT_FORWARD);
+      pffft_zreorder(pffft_status, out, tmp, PFFFT_BACKWARD);
+      for (k = 0; k < num_floats; ++k) {
+        SCOPED_TRACE(k);
+        EXPECT_EQ(tmp2[k], tmp[k]);
+      }
+      pffft_zreorder(pffft_status, tmp, out, PFFFT_FORWARD);
+    } else {
+      // Pass 1: canonical ordering of transform coefficients.
+      pffft_transform_ordered(pffft_status, in, tmp, 0, PFFFT_FORWARD);
+      memcpy(tmp2, tmp, num_bytes);
+      memcpy(tmp, in, num_bytes);
+      pffft_transform_ordered(pffft_status, tmp, tmp, 0, PFFFT_FORWARD);
+      for (k = 0; k < num_floats; ++k) {
+        SCOPED_TRACE(k);
+        EXPECT_EQ(tmp2[k], tmp[k]);
+      }
+      memcpy(out, tmp, num_bytes);
+    }
+
+    {
+      for (k = 0; k < num_floats; ++k) {
+        SCOPED_TRACE(k);
+        EXPECT_NEAR(ref[k], out[k], 1e-3 * ref_max) << "Forward FFT mismatch";
+      }
+
+      if (pass == 0) {
+        pffft_transform(pffft_status, tmp, out, 0, PFFFT_BACKWARD);
+      } else {
+        pffft_transform_ordered(pffft_status, tmp, out, 0, PFFFT_BACKWARD);
+      }
+      memcpy(tmp2, out, num_bytes);
+      memcpy(out, tmp, num_bytes);
+      if (pass == 0) {
+        pffft_transform(pffft_status, out, out, 0, PFFFT_BACKWARD);
+      } else {
+        pffft_transform_ordered(pffft_status, out, out, 0, PFFFT_BACKWARD);
+      }
+      for (k = 0; k < num_floats; ++k) {
+        assert(tmp2[k] == out[k]);
+        out[k] *= 1.f / fft_size;
+      }
+      for (k = 0; k < num_floats; ++k) {
+        SCOPED_TRACE(k);
+        EXPECT_NEAR(in[k], out[k], 1e-3 * ref_max) << "Reverse FFT mismatch";
+      }
+    }
+
+    // Quick test of the circular convolution in FFT domain.
+    {
+      float conv_err = 0, conv_max = 0;
+
+      pffft_zreorder(pffft_status, ref, tmp, PFFFT_FORWARD);
+      memset(out, 0, num_bytes);
+      pffft_zconvolve_accumulate(pffft_status, ref, ref, out, 1.0);
+      pffft_zreorder(pffft_status, out, tmp2, PFFFT_FORWARD);
+
+      for (k = 0; k < num_floats; k += 2) {
+        float ar = tmp[k], ai = tmp[k + 1];
+        if (complex_fft || k > 0) {
+          tmp[k] = ar * ar - ai * ai;
+          tmp[k + 1] = 2 * ar * ai;
+        } else {
+          tmp[0] = ar * ar;
+          tmp[1] = ai * ai;
+        }
+      }
+
+      for (k = 0; k < num_floats; ++k) {
+        float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]);
+        if (d > conv_err)
+          conv_err = d;
+        if (e > conv_max)
+          conv_max = e;
+      }
+      EXPECT_LT(conv_err, 1e-5 * conv_max) << "zconvolve error";
+    }
+  }
+
+  pffft_destroy_setup(pffft_status);
+  pffft_aligned_free(ref);
+  pffft_aligned_free(in);
+  pffft_aligned_free(out);
+  pffft_aligned_free(tmp);
+  pffft_aligned_free(tmp2);
+}
+
+}  // namespace
+
+TEST(PffftTest, ValidateReal) {
+  for (int fft_size : kFftSizes) {
+    SCOPED_TRACE(fft_size);
+    if (fft_size == 16) {
+      continue;
+    }
+    PffftValidate(fft_size, /*complex_fft=*/false);
+  }
+}
+
+TEST(PffftTest, ValidateComplex) {
+  for (int fft_size : kFftSizes) {
+    SCOPED_TRACE(fft_size);
+    PffftValidate(fft_size, /*complex_fft=*/true);
+  }
+}
+
+}  // namespace test
+}  // namespace pffft
--- a/third_party/pffft/src/pffft.c
+++ b/third_party/pffft/src/pffft.c
@@ -59,7 +59,7 @@

 #include "pffft.h"
 #include <stdlib.h>
-#include <stdio.h>
+// #include <stdio.h>
 #include <math.h>
 #include <assert.h>

@@ -82,6 +82,9 @@
 #  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
 #endif

+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif

 /* 
   vector support macros: the rest of the code is independant of
@@ -219,31 +222,35 @@ void validate_pffft_simd() {
  memcpy(a3.f, f+12, 4*sizeof(float));

  t = a0; u = a1; t.v = VZERO();
-  printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
+  // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 0, 0, 0, 0);
  t.v = VADD(a1.v, a2.v);
-  printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
+  // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 12, 14, 16, 18);
  t.v = VMUL(a1.v, a2.v);
-  printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
+  // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 32, 45, 60, 77);
  t.v = VMADD(a1.v, a2.v,a0.v);
-  printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
+  // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 32, 46, 62, 80);

  INTERLEAVE2(a1.v,a2.v,t.v,u.v);
-  printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
  assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
  UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
-  printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
  assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);

  t.v=LD_PS1(f[15]);
-  printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
  assertv4(t, 15, 15, 15, 15);
  t.v = VSWAPHL(a1.v, a2.v);
-  printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
  assertv4(t, 8, 9, 6, 7);
  VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
-  printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", 
-         a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], 
-         a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); 
+  // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", 
+  //        a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], 
+  //        a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); 
  assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
 }
 #endif //!PFFFT_SIMD_DISABLE

--- a/third_party/pffft/src/pffft.h
+++ b/third_party/pffft/src/pffft.h
@@ -83,92 +83,113 @@
 extern "C" {
 #endif

-  /* opaque struct holding internal stuff (precomputed twiddle factors)
-     this struct can be shared by many threads as it contains only
-     read-only data.  
-  */
-  typedef struct PFFFT_Setup PFFFT_Setup;
-
-  /* direction of the transform */
-  typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
-  
-  /* type of transform */
-  typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
-
-  /*
-    prepare for performing transforms of size N -- the returned
-    PFFFT_Setup structure is read-only so it can safely be shared by
-    multiple concurrent threads. 
-  */
-  PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
-  void pffft_destroy_setup(PFFFT_Setup *);
-  /* 
-     Perform a Fourier transform , The z-domain data is stored in the
-     most efficient order for transforming it back, or using it for
-     convolution. If you need to have its content sorted in the
-     "usual" way, that is as an array of interleaved complex numbers,
-     either use pffft_transform_ordered , or call pffft_zreorder after
-     the forward fft, and before the backward fft.
-
-     Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
-     Typically you will want to scale the backward transform by 1/N.
-     
-     The 'work' pointer should point to an area of N (2*N for complex
-     fft) floats, properly aligned. If 'work' is NULL, then stack will
-     be used instead (this is probably the best strategy for small
-     FFTs, say for N < 16384).
-
-     input and output may alias.
-  */
-  void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
-
-  /* 
-     Similar to pffft_transform, but makes sure that the output is
-     ordered as expected (interleaved complex numbers).  This is
-     similar to calling pffft_transform and then pffft_zreorder.
-     
-     input and output may alias.
-  */
-  void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
-
-  /* 
-     call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
-     PFFFT_FORWARD) if you want to have the frequency components in
-     the correct "canonical" order, as interleaved complex numbers.
-     
-     (for real transforms, both 0-frequency and half frequency
-     components, which are real, are assembled in the first entry as
-     F(0)+i*F(n/2+1). Note that the original fftpack did place
-     F(n/2+1) at the end of the arrays).
-     
-     input and output should not alias.
-  */
-  void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
-
-  /* 
-     Perform a multiplication of the frequency components of dft_a and
-     dft_b and accumulate them into dft_ab. The arrays should have
-     been obtained with pffft_transform(.., PFFFT_FORWARD) and should
-     *not* have been reordered with pffft_zreorder (otherwise just
-     perform the operation yourself as the dft coefs are stored as
-     interleaved complex numbers).
-     
-     the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
-     
-     The dft_a, dft_b and dft_ab pointers may alias.
-  */
-  void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
-
-  /*
-    the float buffers must have the correct alignment (16-byte boundary
-    on intel and powerpc). This function may be used to obtain such
-    correctly aligned buffers.  
-  */
-  void *pffft_aligned_malloc(size_t nb_bytes);
-  void pffft_aligned_free(void *);
-
-  /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
-  int pffft_simd_size();
+#ifndef PFFFT_SIMD_DISABLE
+// Detects compiler bugs with respect to simd instruction.
+void validate_pffft_simd();
+#endif
+
+/* opaque struct holding internal stuff (precomputed twiddle factors)
+   this struct can be shared by many threads as it contains only
+   read-only data.
+*/
+typedef struct PFFFT_Setup PFFFT_Setup;
+
+/* direction of the transform */
+typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
+
+/* type of transform */
+typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
+
+/*
+  prepare for performing transforms of size N -- the returned
+  PFFFT_Setup structure is read-only so it can safely be shared by
+  multiple concurrent threads.
+*/
+PFFFT_Setup* pffft_new_setup(int N, pffft_transform_t transform);
+void pffft_destroy_setup(PFFFT_Setup*);
+/*
+   Perform a Fourier transform , The z-domain data is stored in the
+   most efficient order for transforming it back, or using it for
+   convolution. If you need to have its content sorted in the
+   "usual" way, that is as an array of interleaved complex numbers,
+   either use pffft_transform_ordered , or call pffft_zreorder after
+   the forward fft, and before the backward fft.
+
+   Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
+   Typically you will want to scale the backward transform by 1/N.
+
+   The 'work' pointer should point to an area of N (2*N for complex
+   fft) floats, properly aligned. If 'work' is NULL, then stack will
+   be used instead (this is probably the best strategy for small
+   FFTs, say for N < 16384).
+
+   input and output may alias.
+*/
+void pffft_transform(PFFFT_Setup* setup,
+                     const float* input,
+                     float* output,
+                     float* work,
+                     pffft_direction_t direction);
+
+/*
+   Similar to pffft_transform, but makes sure that the output is
+   ordered as expected (interleaved complex numbers).  This is
+   similar to calling pffft_transform and then pffft_zreorder.
+
+   input and output may alias.
+*/
+void pffft_transform_ordered(PFFFT_Setup* setup,
+                             const float* input,
+                             float* output,
+                             float* work,
+                             pffft_direction_t direction);
+
+/*
+   call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
+   PFFFT_FORWARD) if you want to have the frequency components in
+   the correct "canonical" order, as interleaved complex numbers.
+
+   (for real transforms, both 0-frequency and half frequency
+   components, which are real, are assembled in the first entry as
+   F(0)+i*F(n/2+1). Note that the original fftpack did place
+   F(n/2+1) at the end of the arrays).
+
+   input and output should not alias.
+*/
+void pffft_zreorder(PFFFT_Setup* setup,
+                    const float* input,
+                    float* output,
+                    pffft_direction_t direction);
+
+/*
+   Perform a multiplication of the frequency components of dft_a and
+   dft_b and accumulate them into dft_ab. The arrays should have
+   been obtained with pffft_transform(.., PFFFT_FORWARD) and should
+   *not* have been reordered with pffft_zreorder (otherwise just
+   perform the operation yourself as the dft coefs are stored as
+   interleaved complex numbers).
+
+   the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
+
+   The dft_a, dft_b and dft_ab pointers may alias.
+*/
+void pffft_zconvolve_accumulate(PFFFT_Setup* setup,
+                                const float* dft_a,
+                                const float* dft_b,
+                                float* dft_ab,
+                                float scaling);
+
+/*
+  the float buffers must have the correct alignment (16-byte boundary
+  on intel and powerpc). This function may be used to obtain such
+  correctly aligned buffers.
+*/
+void* pffft_aligned_malloc(size_t nb_bytes);
+void pffft_aligned_free(void*);
+
+/* return 4 or 1 wether support SSE/Altivec instructions was enable when
+ * building pffft.c */
+int pffft_simd_size();

 #ifdef __cplusplus
 }

--- a/third_party/pffft/src/test_pffft.c
+++ b/third_party/pffft/src/test_pffft.c
@@ -361,10 +361,6 @@ void benchmark_ffts(int N, int cplx) {
  pffft_aligned_free(Z);
 }

-#ifndef PFFFT_SIMD_DISABLE
-void validate_pffft_simd(); // a small function inside pffft.c that will detect compiler bugs with respect to simd instruction 
-#endif
-
 int main(int argc, char **argv) {
  int Nvalues[] = { 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128, 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 };
  int i;