Commit 6ff108b6 authored by Dan Sanders's avatar Dan Sanders Committed by Commit Bot

[media] Flush denormals in SSE2 WSOLA.

When playing silence, many samples are small enough that squaring them
results in a denormal float. On some AMD CPUs this results in significantly
reduced performance.

This patch configures the SSE floating point environment to flush
denormals and to treat denormals as zero.

Bug: 531026
Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel
Change-Id: If3842a22f6dc9bef76d87b0a6db069c3f9234d19
Reviewed-on: https://chromium-review.googlesource.com/1179361
Commit-Queue: Dan Sanders <sandersd@chromium.org>
Reviewed-by: default avatarDale Curtis <dalecurtis@chromium.org>
Reviewed-by: default avatarRobert Flack <flackr@chromium.org>
Cr-Commit-Position: refs/heads/master@{#584220}
parent 88bf4fca
# Do NOT add net/ or ui/base without a great reason, they're huge! # Do NOT add net/ or ui/base without a great reason, they're huge!
include_rules = [ include_rules = [
"+cc/base/math_util.h",
"+cc/paint", "+cc/paint",
"+chromeos/audio", "+chromeos/audio",
"+crypto", "+crypto",
......
...@@ -80,6 +80,7 @@ ...@@ -80,6 +80,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/numerics/math_constants.h" #include "base/numerics/math_constants.h"
#include "build/build_config.h" #include "build/build_config.h"
#include "cc/base/math_util.h"
#if defined(ARCH_CPU_X86_FAMILY) #if defined(ARCH_CPU_X86_FAMILY)
#include <xmmintrin.h> #include <xmmintrin.h>
...@@ -237,9 +238,14 @@ void SincResampler::Resample(int frames, float* destination) { ...@@ -237,9 +238,14 @@ void SincResampler::Resample(int frames, float* destination) {
// Step (2) -- Resample! // Step (2) -- Resample!
while (remaining_frames) { while (remaining_frames) {
// Silent audio can contain non-zero samples small enough to result in
// subnormals internally. Disabling subnormals can be significantly faster.
{
cc::ScopedSubnormalFloatDisabler disable_subnormals;
while (virtual_source_idx_ < block_size_) { while (virtual_source_idx_ < block_size_) {
// |virtual_source_idx_| lies in between two kernel offsets so figure out // |virtual_source_idx_| lies in between two kernel offsets so figure
// what they are. // out what they are.
const int source_idx = static_cast<int>(virtual_source_idx_); const int source_idx = static_cast<int>(virtual_source_idx_);
const double virtual_offset_idx = const double virtual_offset_idx =
(virtual_source_idx_ - source_idx) * kKernelOffsetCount; (virtual_source_idx_ - source_idx) * kKernelOffsetCount;
...@@ -250,8 +256,8 @@ void SincResampler::Resample(int frames, float* destination) { ...@@ -250,8 +256,8 @@ void SincResampler::Resample(int frames, float* destination) {
const float* k1 = kernel_storage_.get() + offset_idx * kKernelSize; const float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;
const float* k2 = k1 + kKernelSize; const float* k2 = k1 + kKernelSize;
// Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always
// true so long as kKernelSize is a multiple of 16. // be true so long as kKernelSize is a multiple of 16.
DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F); DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F); DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
...@@ -269,6 +275,7 @@ void SincResampler::Resample(int frames, float* destination) { ...@@ -269,6 +275,7 @@ void SincResampler::Resample(int frames, float* destination) {
if (!--remaining_frames) if (!--remaining_frames)
return; return;
} }
}
// Wrap back around to the start. // Wrap back around to the start.
DCHECK_GE(virtual_source_idx_, block_size_); DCHECK_GE(virtual_source_idx_, block_size_);
......
...@@ -98,6 +98,7 @@ jumbo_source_set("filters") { ...@@ -98,6 +98,7 @@ jumbo_source_set("filters") {
] ]
deps = [ deps = [
"//cc/base", # For MathUtil.
"//gpu/command_buffer/common", "//gpu/command_buffer/common",
"//media/base", "//media/base",
"//media/cdm", "//media/cdm",
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <cmath> #include <cmath>
#include "base/logging.h" #include "base/logging.h"
#include "cc/base/math_util.h"
#include "media/base/audio_bus.h" #include "media/base/audio_bus.h"
#include "media/base/limits.h" #include "media/base/limits.h"
#include "media/filters/wsola_internals.h" #include "media/filters/wsola_internals.h"
...@@ -192,6 +193,11 @@ int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, ...@@ -192,6 +193,11 @@ int AudioRendererAlgorithm::FillBuffer(AudioBus* dest,
CreateSearchWrappers(); CreateSearchWrappers();
} }
// Silent audio can contain non-zero samples small enough to result in
// subnormals internalls. Disabling subnormals can be significantly faster in
// these cases.
cc::ScopedSubnormalFloatDisabler disable_subnormals;
int rendered_frames = 0; int rendered_frames = 0;
do { do {
rendered_frames += rendered_frames +=
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/numerics/math_constants.h" #include "base/numerics/math_constants.h"
#include "build/build_config.h"
#include "media/base/audio_bus.h" #include "media/base/audio_bus.h"
#if defined(ARCH_CPU_X86_FAMILY) #if defined(ARCH_CPU_X86_FAMILY)
......
# Do NOT add net/ or ui/base without a great reason, they're huge! # Do NOT add net/ or ui/base without a great reason, they're huge!
include_rules = [ include_rules = [
"+cc/base/math_util.h",
"+components/viz/client", "+components/viz/client",
"+components/viz/common", "+components/viz/common",
"+third_party/khronos/GLES2", "+third_party/khronos/GLES2",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment