Commit c1086af0 authored by Alessio Bazzica's avatar Alessio Bazzica Committed by Commit Bot

Reland "Add build file for PFFFT"

This is a reland of 8e4cf155

This CL includes the following changes:
1. when fuzzing, PFFFT is compiled disabling SIMD
2. SIMD is also disabled on fuchsia and android because PFFFT only checks __arm__
3. unit test to validate the output of PFFFT (compared against that of FFTPACK)

The first change fixes the problem due to which the original CL has been reverted;
however, it makes fuzzing slower and reduces the coverage since SIMD cannot be used.

Similarly, the second change is a temporary solution to allow landing this CL.
SIMD will be re-enabled in a follow-up CL.

Original change's description:
> Add build file for PFFFT
>
> - fuzzer corpus generator and fuzzer targets
> - fftpack isolated as private test only target (only needed for the benchmark)
>
> Bug: webrtc:9577
> Change-Id: Idc904bc4b05f945a7461a14893518551bbe34b84
> Reviewed-on: https://chromium-review.googlesource.com/c/1452000
> Commit-Queue: Ale Bzk <alessiob@chromium.org>
> Reviewed-by: Nico Weber <thakis@chromium.org>
> Reviewed-by: Olga Sharonova <olka@chromium.org>
> Reviewed-by: Max Moroz <mmoroz@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#629627}

Bug: webrtc:9577
Change-Id: Icfbb4b966c3ad866e9e2970b63363e0e258b1fea
Reviewed-on: https://chromium-review.googlesource.com/c/1458076
Commit-Queue: Ale Bzk <alessiob@chromium.org>
Reviewed-by: default avatarNico Weber <thakis@chromium.org>
Reviewed-by: default avatarMax Moroz <mmoroz@chromium.org>
Reviewed-by: default avatarMax Morin <maxmorin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#630085}
parent 2c3b57ca
......@@ -775,6 +775,13 @@ group("gn_all") {
"//tools/accessibility/inspect:ax_dump_tree",
]
}
# PFFFT.
deps += [
"//third_party/pffft:fuzzers",
"//third_party/pffft:pffft_benchmark",
"//third_party/pffft:pffft_unittest",
]
}
if ((is_linux || is_win) && enable_remoting && !use_ozone) {
......
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//testing/libfuzzer/fuzzer_test.gni")
import("//testing/test.gni")
# TODO(https://crbug.com/webrtc/9577): Add architecture specific flags.
config("simd_config") {
# TODO(https://crbug.com/webrtc/9577): Increase fuzzer coverage by using SIMD.
# TODO(https://crbug.com/webrtc/9577): Fix SIMD not detected on android arm64.
# TODO(https://crbug.com/webrtc/9577): Fix SIMD not detected on fuchsia arm64.
if (use_fuzzing_engine || is_android || is_fuchsia) {
cflags = [ "-DPFFFT_SIMD_DISABLE" ]
}
}
static_library("pffft") {
configs += [ ":simd_config" ]
sources = [
"src/pffft.c",
"src/pffft.h",
]
}
# Fuzzing.
group("fuzzers") {
testonly = true
deps = [
":pffft_complex_fuzzer",
":pffft_real_fuzzer",
]
}
fuzzer_testdata_dir = "$target_gen_dir/testdata"
action("generate_pffft_fuzzer_corpus") {
script = "generate_seed_corpus.py"
sources = [
"generate_seed_corpus.py",
]
args = [ rebase_path(fuzzer_testdata_dir, root_build_dir) ]
outputs = [
fuzzer_testdata_dir,
]
}
fuzzer_test("pffft_complex_fuzzer") {
sources = [
"pffft_fuzzer.cc",
]
cflags = [ "-DTRANSFORM_COMPLEX" ]
deps = [
":pffft",
]
seed_corpus = fuzzer_testdata_dir
seed_corpus_deps = [ ":generate_pffft_fuzzer_corpus" ]
}
fuzzer_test("pffft_real_fuzzer") {
sources = [
"pffft_fuzzer.cc",
]
cflags = [ "-DTRANSFORM_REAL" ]
deps = [
":pffft",
]
seed_corpus = fuzzer_testdata_dir
seed_corpus_deps = [ ":generate_pffft_fuzzer_corpus" ]
}
# Unit tests and benchmark.
# This target must be used only for testing and benchmark purposes.
static_library("fftpack") {
testonly = true
sources = [
"src/fftpack.c",
"src/fftpack.h",
]
visibility = [ ":*" ]
}
executable("pffft_benchmark") {
configs += [ ":simd_config" ]
testonly = true
sources = [
"src/test_pffft.c",
]
deps = [
":fftpack",
":pffft",
]
}
test("pffft_unittest") {
sources = [
"pffft_unittest.cc",
]
deps = [
":fftpack",
":pffft",
"//testing/gtest",
"//testing/gtest:gtest_main",
]
}
include_rules = [
"+testing/gtest/include/gtest",
]
......@@ -13,3 +13,6 @@ Description:
PFFFT does 1D Fast Fourier Transforms, of single precision real and complex vectors. It tries do it fast, it tries to be correct, and it tries to be small. Computations do take advantage of SSE1 instructions on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus.
Local Modifications:
* 01-add_m_pi.diff: define M_PI if not defined
* 02-rmv_printf.diff: remove printf and stop including stdio.h
* 03-decl_validate_simd.diff: declare validate_pffft_simd() in pffft.h
#!/usr/bin/env python
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import division
from __future__ import print_function
from array import array
import os
import random
import sys
MAX_INPUT_SIZE = int(1e6)
MAX_FLOAT32 = 3.4028235e+38
def IsValidSize(n):
if n == 0:
return False
# PFFFT only supports transforms for inputs of length N of the form
# N = (2^a)*(3^b)*(5^c) where a >= 5, b >=0, c >= 0.
FACTORS = [2, 3, 5]
factorization = [0, 0, 0]
for i, factor in enumerate(FACTORS):
while n % factor == 0:
n = n // factor
factorization[i] += 1
return factorization[0] >= 5 and n == 1
def WriteFloat32ArrayToFile(file_path, size, generator):
"""Generate an array of float32 values and writes to file."""
with open(file_path, 'wb') as f:
float_array = array('f', [generator() for _ in range(size)])
float_array.tofile(f)
def main():
if len(sys.argv) < 2:
print('Usage: %s <path to output directory>' % sys.argv[0])
sys.exit(1)
output_path = sys.argv[1]
# Create output directory if missing.
if not os.path.exists(output_path):
os.makedirs(output_path)
# List of valid input sizes.
N = [n for n in range(MAX_INPUT_SIZE) if IsValidSize(n)]
# Set the seed to always generate the same random data.
random.seed(0)
# Generate different types of input arrays for each target length.
for n in N:
# Zeros.
WriteFloat32ArrayToFile(
os.path.join(output_path, 'zeros_%d' % n), n, lambda: 0)
# Max float 32.
WriteFloat32ArrayToFile(
os.path.join(output_path, 'max_%d' % n), n, lambda: MAX_FLOAT32)
# Random values in the s16 range.
rnd_s16 = lambda: 32768.0 * 2.0 * (random.random() - 0.5)
WriteFloat32ArrayToFile(
os.path.join(output_path, 'rnd_s16_%d' % n), n, rnd_s16)
sys.exit(0)
if __name__ == '__main__':
main()
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
index 0603f2b1f698..7934db448a09 100644
--- a/third_party/pffft/src/pffft.c
+++ b/third_party/pffft/src/pffft.c
@@ -82,6 +82,9 @@
# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
#endif
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
/*
vector support macros: the rest of the code is independant of
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
index 7934db448a09..2e0c2f651438 100644
--- a/third_party/pffft/src/pffft.c
+++ b/third_party/pffft/src/pffft.c
@@ -59,7 +59,7 @@
#include "pffft.h"
#include <stdlib.h>
-#include <stdio.h>
+// #include <stdio.h>
#include <math.h>
#include <assert.h>
@@ -222,31 +222,35 @@ void validate_pffft_simd() {
memcpy(a3.f, f+12, 4*sizeof(float));
t = a0; u = a1; t.v = VZERO();
- printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
+ // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+ assertv4(t, 0, 0, 0, 0);
t.v = VADD(a1.v, a2.v);
- printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
+ // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+ assertv4(t, 12, 14, 16, 18);
t.v = VMUL(a1.v, a2.v);
- printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
+ // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+ assertv4(t, 32, 45, 60, 77);
t.v = VMADD(a1.v, a2.v,a0.v);
- printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
+ // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+ assertv4(t, 32, 46, 62, 80);
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
- printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+ // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
- printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+ // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
t.v=LD_PS1(f[15]);
- printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+ // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 15, 15, 15, 15);
t.v = VSWAPHL(a1.v, a2.v);
- printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+ // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 8, 9, 6, 7);
VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
- printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
- a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
- a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
+ // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
+ // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
+ // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
}
#endif //!PFFFT_SIMD_DISABLE
diff --git a/third_party/pffft/src/pffft.h b/third_party/pffft/src/pffft.h
index 2bfa7b3ebcfb..bb6f78d4b795 100644
--- a/third_party/pffft/src/pffft.h
+++ b/third_party/pffft/src/pffft.h
@@ -83,6 +83,11 @@
extern "C" {
#endif
+#ifndef PFFFT_SIMD_DISABLE
+ // Detects compiler bugs with respect to simd instruction.
+ void validate_pffft_simd();
+#endif
+
/* opaque struct holding internal stuff (precomputed twiddle factors)
this struct can be shared by many threads as it contains only
read-only data.
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <algorithm>
#include <array>
#include <cassert>
#include <cstring>
#include "third_party/pffft/src/pffft.h"
namespace {
#if defined(TRANSFORM_REAL)
// Real FFT.
constexpr pffft_transform_t kTransform = PFFFT_REAL;
constexpr size_t kSizeOfOneSample = sizeof(float);
#elif defined(TRANSFORM_COMPLEX)
// Complex FFT.
constexpr pffft_transform_t kTransform = PFFFT_COMPLEX;
constexpr size_t kSizeOfOneSample = 2 * sizeof(float); // Real plus imaginary.
#else
#error FFT transform type not defined.
#endif
bool IsValidSize(size_t n) {
if (n == 0) {
return false;
}
// PFFFT only supports transforms for inputs of length N of the form
// N = (2^a)*(3^b)*(5^c) where a >= 5, b >=0, c >= 0.
constexpr std::array<int, 3> kFactors = {2, 3, 5};
std::array<int, kFactors.size()> factorization{};
for (size_t i = 0; i < kFactors.size(); ++i) {
const int factor = kFactors[i];
while (n % factor == 0) {
n /= factor;
factorization[i]++;
}
}
return factorization[0] >= 5 && n == 1;
}
float* AllocatePffftBuffer(size_t number_of_bytes) {
return static_cast<float*>(pffft_aligned_malloc(number_of_bytes));
}
} // namespace
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
// Set the number of FFT points to use |data| as input vector.
// The latter is truncated if the number of bytes is not an integer
// multiple of the size of one sample (which is either a real or a complex
// floating point number).
const size_t fft_size = size / kSizeOfOneSample;
if (!IsValidSize(fft_size)) {
return 0;
}
const size_t number_of_bytes = fft_size * kSizeOfOneSample;
assert(number_of_bytes <= size);
// Allocate input and output buffers.
float* in = AllocatePffftBuffer(number_of_bytes);
float* out = AllocatePffftBuffer(number_of_bytes);
// Copy input data.
std::memcpy(in, reinterpret_cast<const float*>(data), number_of_bytes);
// Setup FFT.
PFFFT_Setup* pffft_setup = pffft_new_setup(fft_size, kTransform);
// Call different PFFFT functions to maximize the coverage.
pffft_transform(pffft_setup, in, out, nullptr, PFFFT_FORWARD);
pffft_zconvolve_accumulate(pffft_setup, out, out, out, 1.f);
pffft_transform_ordered(pffft_setup, in, out, nullptr, PFFFT_BACKWARD);
// Release memory.
pffft_aligned_free(in);
pffft_aligned_free(out);
pffft_destroy_setup(pffft_setup);
return 0;
}
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <cmath>
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/pffft/src/fftpack.h"
#include "third_party/pffft/src/pffft.h"
#define MAX(x, y) ((x) > (y) ? (x) : (y))
namespace pffft {
namespace test {
namespace {
static constexpr int kFftSizes[] = {
16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5 * 96, 512,
576, 5 * 128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864};
double frand() {
return rand() / (double)RAND_MAX;
}
void PffftValidate(int fft_size, bool complex_fft) {
PFFFT_Setup* pffft_status =
pffft_new_setup(fft_size, complex_fft ? PFFFT_COMPLEX : PFFFT_REAL);
ASSERT_TRUE(pffft_status) << "FFT size (" << fft_size << ") not supported.";
int num_floats = fft_size * (complex_fft ? 2 : 1);
int num_bytes = num_floats * sizeof(float);
float* ref = static_cast<float*>(pffft_aligned_malloc(num_bytes));
float* in = static_cast<float*>(pffft_aligned_malloc(num_bytes));
float* out = static_cast<float*>(pffft_aligned_malloc(num_bytes));
float* tmp = static_cast<float*>(pffft_aligned_malloc(num_bytes));
float* tmp2 = static_cast<float*>(pffft_aligned_malloc(num_bytes));
for (int pass = 0; pass < 2; ++pass) {
SCOPED_TRACE(pass);
float ref_max = 0;
int k;
// Compute reference solution with FFTPACK.
if (pass == 0) {
float* fftpack_buff =
static_cast<float*>(malloc(2 * num_bytes + 15 * sizeof(float)));
for (k = 0; k < num_floats; ++k) {
ref[k] = in[k] = frand() * 2 - 1;
out[k] = 1e30;
}
if (!complex_fft) {
rffti(fft_size, fftpack_buff);
rfftf(fft_size, ref, fftpack_buff);
// Use our ordering for real FFTs instead of the one of fftpack.
{
float refN = ref[fft_size - 1];
for (k = fft_size - 2; k >= 1; --k)
ref[k + 1] = ref[k];
ref[1] = refN;
}
} else {
cffti(fft_size, fftpack_buff);
cfftf(fft_size, ref, fftpack_buff);
}
free(fftpack_buff);
}
for (k = 0; k < num_floats; ++k) {
ref_max = MAX(ref_max, fabs(ref[k]));
}
// Pass 0: non canonical ordering of transform coefficients.
if (pass == 0) {
// Test forward transform, with different input / output.
pffft_transform(pffft_status, in, tmp, 0, PFFFT_FORWARD);
memcpy(tmp2, tmp, num_bytes);
memcpy(tmp, in, num_bytes);
pffft_transform(pffft_status, tmp, tmp, 0, PFFFT_FORWARD);
for (k = 0; k < num_floats; ++k) {
SCOPED_TRACE(k);
EXPECT_EQ(tmp2[k], tmp[k]);
}
// Test reordering.
pffft_zreorder(pffft_status, tmp, out, PFFFT_FORWARD);
pffft_zreorder(pffft_status, out, tmp, PFFFT_BACKWARD);
for (k = 0; k < num_floats; ++k) {
SCOPED_TRACE(k);
EXPECT_EQ(tmp2[k], tmp[k]);
}
pffft_zreorder(pffft_status, tmp, out, PFFFT_FORWARD);
} else {
// Pass 1: canonical ordering of transform coefficients.
pffft_transform_ordered(pffft_status, in, tmp, 0, PFFFT_FORWARD);
memcpy(tmp2, tmp, num_bytes);
memcpy(tmp, in, num_bytes);
pffft_transform_ordered(pffft_status, tmp, tmp, 0, PFFFT_FORWARD);
for (k = 0; k < num_floats; ++k) {
SCOPED_TRACE(k);
EXPECT_EQ(tmp2[k], tmp[k]);
}
memcpy(out, tmp, num_bytes);
}
{
for (k = 0; k < num_floats; ++k) {
SCOPED_TRACE(k);
EXPECT_NEAR(ref[k], out[k], 1e-3 * ref_max) << "Forward FFT mismatch";
}
if (pass == 0) {
pffft_transform(pffft_status, tmp, out, 0, PFFFT_BACKWARD);
} else {
pffft_transform_ordered(pffft_status, tmp, out, 0, PFFFT_BACKWARD);
}
memcpy(tmp2, out, num_bytes);
memcpy(out, tmp, num_bytes);
if (pass == 0) {
pffft_transform(pffft_status, out, out, 0, PFFFT_BACKWARD);
} else {
pffft_transform_ordered(pffft_status, out, out, 0, PFFFT_BACKWARD);
}
for (k = 0; k < num_floats; ++k) {
assert(tmp2[k] == out[k]);
out[k] *= 1.f / fft_size;
}
for (k = 0; k < num_floats; ++k) {
SCOPED_TRACE(k);
EXPECT_NEAR(in[k], out[k], 1e-3 * ref_max) << "Reverse FFT mismatch";
}
}
// Quick test of the circular convolution in FFT domain.
{
float conv_err = 0, conv_max = 0;
pffft_zreorder(pffft_status, ref, tmp, PFFFT_FORWARD);
memset(out, 0, num_bytes);
pffft_zconvolve_accumulate(pffft_status, ref, ref, out, 1.0);
pffft_zreorder(pffft_status, out, tmp2, PFFFT_FORWARD);
for (k = 0; k < num_floats; k += 2) {
float ar = tmp[k], ai = tmp[k + 1];
if (complex_fft || k > 0) {
tmp[k] = ar * ar - ai * ai;
tmp[k + 1] = 2 * ar * ai;
} else {
tmp[0] = ar * ar;
tmp[1] = ai * ai;
}
}
for (k = 0; k < num_floats; ++k) {
float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]);
if (d > conv_err)
conv_err = d;
if (e > conv_max)
conv_max = e;
}
EXPECT_LT(conv_err, 1e-5 * conv_max) << "zconvolve error";
}
}
pffft_destroy_setup(pffft_status);
pffft_aligned_free(ref);
pffft_aligned_free(in);
pffft_aligned_free(out);
pffft_aligned_free(tmp);
pffft_aligned_free(tmp2);
}
} // namespace
TEST(PffftTest, ValidateReal) {
for (int fft_size : kFftSizes) {
SCOPED_TRACE(fft_size);
if (fft_size == 16) {
continue;
}
PffftValidate(fft_size, /*complex_fft=*/false);
}
}
TEST(PffftTest, ValidateComplex) {
for (int fft_size : kFftSizes) {
SCOPED_TRACE(fft_size);
PffftValidate(fft_size, /*complex_fft=*/true);
}
}
} // namespace test
} // namespace pffft
......@@ -59,7 +59,7 @@
#include "pffft.h"
#include <stdlib.h>
#include <stdio.h>
// #include <stdio.h>
#include <math.h>
#include <assert.h>
......@@ -82,6 +82,9 @@
# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
/*
vector support macros: the rest of the code is independant of
......@@ -219,31 +222,35 @@ void validate_pffft_simd() {
memcpy(a3.f, f+12, 4*sizeof(float));
t = a0; u = a1; t.v = VZERO();
printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
// printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 0, 0, 0, 0);
t.v = VADD(a1.v, a2.v);
printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
// printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 12, 14, 16, 18);
t.v = VMUL(a1.v, a2.v);
printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
// printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 32, 45, 60, 77);
t.v = VMADD(a1.v, a2.v,a0.v);
printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
// printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 32, 46, 62, 80);
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
// printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
// printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
t.v=LD_PS1(f[15]);
printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
// printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 15, 15, 15, 15);
t.v = VSWAPHL(a1.v, a2.v);
printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
// printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
assertv4(t, 8, 9, 6, 7);
VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
// printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
// a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
// a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
}
#endif //!PFFFT_SIMD_DISABLE
......
......@@ -83,92 +83,113 @@
extern "C" {
#endif
/* opaque struct holding internal stuff (precomputed twiddle factors)
this struct can be shared by many threads as it contains only
read-only data.
*/
typedef struct PFFFT_Setup PFFFT_Setup;
/* direction of the transform */
typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
/* type of transform */
typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
/*
prepare for performing transforms of size N -- the returned
PFFFT_Setup structure is read-only so it can safely be shared by
multiple concurrent threads.
*/
PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
void pffft_destroy_setup(PFFFT_Setup *);
/*
Perform a Fourier transform , The z-domain data is stored in the
most efficient order for transforming it back, or using it for
convolution. If you need to have its content sorted in the
"usual" way, that is as an array of interleaved complex numbers,
either use pffft_transform_ordered , or call pffft_zreorder after
the forward fft, and before the backward fft.
Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
Typically you will want to scale the backward transform by 1/N.
The 'work' pointer should point to an area of N (2*N for complex
fft) floats, properly aligned. If 'work' is NULL, then stack will
be used instead (this is probably the best strategy for small
FFTs, say for N < 16384).
input and output may alias.
*/
void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
/*
Similar to pffft_transform, but makes sure that the output is
ordered as expected (interleaved complex numbers). This is
similar to calling pffft_transform and then pffft_zreorder.
input and output may alias.
*/
void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
/*
call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
PFFFT_FORWARD) if you want to have the frequency components in
the correct "canonical" order, as interleaved complex numbers.
(for real transforms, both 0-frequency and half frequency
components, which are real, are assembled in the first entry as
F(0)+i*F(n/2+1). Note that the original fftpack did place
F(n/2+1) at the end of the arrays).
input and output should not alias.
*/
void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
/*
Perform a multiplication of the frequency components of dft_a and
dft_b and accumulate them into dft_ab. The arrays should have
been obtained with pffft_transform(.., PFFFT_FORWARD) and should
*not* have been reordered with pffft_zreorder (otherwise just
perform the operation yourself as the dft coefs are stored as
interleaved complex numbers).
the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
The dft_a, dft_b and dft_ab pointers may alias.
*/
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
/*
the float buffers must have the correct alignment (16-byte boundary
on intel and powerpc). This function may be used to obtain such
correctly aligned buffers.
*/
void *pffft_aligned_malloc(size_t nb_bytes);
void pffft_aligned_free(void *);
/* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
int pffft_simd_size();
#ifndef PFFFT_SIMD_DISABLE
// Detects compiler bugs with respect to simd instruction.
void validate_pffft_simd();
#endif
/* opaque struct holding internal stuff (precomputed twiddle factors)
this struct can be shared by many threads as it contains only
read-only data.
*/
typedef struct PFFFT_Setup PFFFT_Setup;
/* direction of the transform */
typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
/* type of transform */
typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
/*
prepare for performing transforms of size N -- the returned
PFFFT_Setup structure is read-only so it can safely be shared by
multiple concurrent threads.
*/
PFFFT_Setup* pffft_new_setup(int N, pffft_transform_t transform);
void pffft_destroy_setup(PFFFT_Setup*);
/*
Perform a Fourier transform , The z-domain data is stored in the
most efficient order for transforming it back, or using it for
convolution. If you need to have its content sorted in the
"usual" way, that is as an array of interleaved complex numbers,
either use pffft_transform_ordered , or call pffft_zreorder after
the forward fft, and before the backward fft.
Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
Typically you will want to scale the backward transform by 1/N.
The 'work' pointer should point to an area of N (2*N for complex
fft) floats, properly aligned. If 'work' is NULL, then stack will
be used instead (this is probably the best strategy for small
FFTs, say for N < 16384).
input and output may alias.
*/
void pffft_transform(PFFFT_Setup* setup,
const float* input,
float* output,
float* work,
pffft_direction_t direction);
/*
Similar to pffft_transform, but makes sure that the output is
ordered as expected (interleaved complex numbers). This is
similar to calling pffft_transform and then pffft_zreorder.
input and output may alias.
*/
void pffft_transform_ordered(PFFFT_Setup* setup,
const float* input,
float* output,
float* work,
pffft_direction_t direction);
/*
call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
PFFFT_FORWARD) if you want to have the frequency components in
the correct "canonical" order, as interleaved complex numbers.
(for real transforms, both 0-frequency and half frequency
components, which are real, are assembled in the first entry as
F(0)+i*F(n/2+1). Note that the original fftpack did place
F(n/2+1) at the end of the arrays).
input and output should not alias.
*/
void pffft_zreorder(PFFFT_Setup* setup,
const float* input,
float* output,
pffft_direction_t direction);
/*
Perform a multiplication of the frequency components of dft_a and
dft_b and accumulate them into dft_ab. The arrays should have
been obtained with pffft_transform(.., PFFFT_FORWARD) and should
*not* have been reordered with pffft_zreorder (otherwise just
perform the operation yourself as the dft coefs are stored as
interleaved complex numbers).
the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
The dft_a, dft_b and dft_ab pointers may alias.
*/
void pffft_zconvolve_accumulate(PFFFT_Setup* setup,
const float* dft_a,
const float* dft_b,
float* dft_ab,
float scaling);
/*
the float buffers must have the correct alignment (16-byte boundary
on intel and powerpc). This function may be used to obtain such
correctly aligned buffers.
*/
void* pffft_aligned_malloc(size_t nb_bytes);
void pffft_aligned_free(void*);
/* return 4 or 1 wether support SSE/Altivec instructions was enable when
* building pffft.c */
int pffft_simd_size();
#ifdef __cplusplus
}
......
......@@ -361,10 +361,6 @@ void benchmark_ffts(int N, int cplx) {
pffft_aligned_free(Z);
}
#ifndef PFFFT_SIMD_DISABLE
void validate_pffft_simd(); // a small function inside pffft.c that will detect compiler bugs with respect to simd instruction
#endif
int main(int argc, char **argv) {
int Nvalues[] = { 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128, 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 };
int i;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment