Commit 6ff108b6 authored by Dan Sanders's avatar Dan Sanders Committed by Commit Bot

[media] Flush denormals in SSE2 WSOLA.

When playing silence, many samples are small enough that squaring them
results in a denormal float. On some AMD CPUs this results in significantly
reduced performance.

This patch configures the SSE floating point environment to flush
denormals and to treat denormals as zero.

Bug: 531026
Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel
Change-Id: If3842a22f6dc9bef76d87b0a6db069c3f9234d19
Reviewed-on: https://chromium-review.googlesource.com/1179361
Commit-Queue: Dan Sanders <sandersd@chromium.org>
Reviewed-by: default avatarDale Curtis <dalecurtis@chromium.org>
Reviewed-by: default avatarRobert Flack <flackr@chromium.org>
Cr-Commit-Position: refs/heads/master@{#584220}
parent 88bf4fca
# Do NOT add net/ or ui/base without a great reason, they're huge! # Do NOT add net/ or ui/base without a great reason, they're huge!
include_rules = [ include_rules = [
"+cc/base/math_util.h",
"+cc/paint", "+cc/paint",
"+chromeos/audio", "+chromeos/audio",
"+crypto", "+crypto",
......
...@@ -80,6 +80,7 @@ ...@@ -80,6 +80,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/numerics/math_constants.h" #include "base/numerics/math_constants.h"
#include "build/build_config.h" #include "build/build_config.h"
#include "cc/base/math_util.h"
#if defined(ARCH_CPU_X86_FAMILY) #if defined(ARCH_CPU_X86_FAMILY)
#include <xmmintrin.h> #include <xmmintrin.h>
...@@ -237,37 +238,43 @@ void SincResampler::Resample(int frames, float* destination) { ...@@ -237,37 +238,43 @@ void SincResampler::Resample(int frames, float* destination) {
// Step (2) -- Resample! // Step (2) -- Resample!
while (remaining_frames) { while (remaining_frames) {
while (virtual_source_idx_ < block_size_) { // Silent audio can contain non-zero samples small enough to result in
// |virtual_source_idx_| lies in between two kernel offsets so figure out // subnormals internally. Disabling subnormals can be significantly faster.
// what they are. {
const int source_idx = static_cast<int>(virtual_source_idx_); cc::ScopedSubnormalFloatDisabler disable_subnormals;
const double virtual_offset_idx =
(virtual_source_idx_ - source_idx) * kKernelOffsetCount; while (virtual_source_idx_ < block_size_) {
const int offset_idx = static_cast<int>(virtual_offset_idx); // |virtual_source_idx_| lies in between two kernel offsets so figure
// out what they are.
// We'll compute "convolutions" for the two kernels which straddle const int source_idx = static_cast<int>(virtual_source_idx_);
// |virtual_source_idx_|. const double virtual_offset_idx =
const float* k1 = kernel_storage_.get() + offset_idx * kKernelSize; (virtual_source_idx_ - source_idx) * kKernelOffsetCount;
const float* k2 = k1 + kKernelSize; const int offset_idx = static_cast<int>(virtual_offset_idx);
// Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be // We'll compute "convolutions" for the two kernels which straddle
// true so long as kKernelSize is a multiple of 16. // |virtual_source_idx_|.
DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F); const float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;
DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F); const float* k2 = k1 + kKernelSize;
// Initialize input pointer based on quantized |virtual_source_idx_|. // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always
const float* input_ptr = r1_ + source_idx; // be true so long as kKernelSize is a multiple of 16.
DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
// Figure out how much to weight each kernel's "convolution". DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
const double kernel_interpolation_factor =
virtual_offset_idx - offset_idx; // Initialize input pointer based on quantized |virtual_source_idx_|.
*destination++ = const float* input_ptr = r1_ + source_idx;
CONVOLVE_FUNC(input_ptr, k1, k2, kernel_interpolation_factor);
// Figure out how much to weight each kernel's "convolution".
// Advance the virtual index. const double kernel_interpolation_factor =
virtual_source_idx_ += io_sample_rate_ratio_; virtual_offset_idx - offset_idx;
if (!--remaining_frames) *destination++ =
return; CONVOLVE_FUNC(input_ptr, k1, k2, kernel_interpolation_factor);
// Advance the virtual index.
virtual_source_idx_ += io_sample_rate_ratio_;
if (!--remaining_frames)
return;
}
} }
// Wrap back around to the start. // Wrap back around to the start.
......
...@@ -98,6 +98,7 @@ jumbo_source_set("filters") { ...@@ -98,6 +98,7 @@ jumbo_source_set("filters") {
] ]
deps = [ deps = [
"//cc/base", # For MathUtil.
"//gpu/command_buffer/common", "//gpu/command_buffer/common",
"//media/base", "//media/base",
"//media/cdm", "//media/cdm",
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <cmath> #include <cmath>
#include "base/logging.h" #include "base/logging.h"
#include "cc/base/math_util.h"
#include "media/base/audio_bus.h" #include "media/base/audio_bus.h"
#include "media/base/limits.h" #include "media/base/limits.h"
#include "media/filters/wsola_internals.h" #include "media/filters/wsola_internals.h"
...@@ -192,6 +193,11 @@ int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, ...@@ -192,6 +193,11 @@ int AudioRendererAlgorithm::FillBuffer(AudioBus* dest,
CreateSearchWrappers(); CreateSearchWrappers();
} }
// Silent audio can contain non-zero samples small enough to result in
// subnormals internalls. Disabling subnormals can be significantly faster in
// these cases.
cc::ScopedSubnormalFloatDisabler disable_subnormals;
int rendered_frames = 0; int rendered_frames = 0;
do { do {
rendered_frames += rendered_frames +=
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/numerics/math_constants.h" #include "base/numerics/math_constants.h"
#include "build/build_config.h"
#include "media/base/audio_bus.h" #include "media/base/audio_bus.h"
#if defined(ARCH_CPU_X86_FAMILY) #if defined(ARCH_CPU_X86_FAMILY)
......
# Do NOT add net/ or ui/base without a great reason, they're huge! # Do NOT add net/ or ui/base without a great reason, they're huge!
include_rules = [ include_rules = [
"+cc/base/math_util.h",
"+components/viz/client", "+components/viz/client",
"+components/viz/common", "+components/viz/common",
"+third_party/khronos/GLES2", "+third_party/khronos/GLES2",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment