Commit 1463ca03 authored by rtoy@chromium.org's avatar rtoy@chromium.org

Flush denormals to zero on arm and arm64.

We also slightly refactor the code to make it a little easier to read.

The impact of denormals on arm appear to be much less pronounced than
on x86, but flushing denormals will make arm and x86 results more similar.

This was tested manually by creating a test to check the response of a
lowpass filter to a very small impulse. All values after a certain
point should be zero when denormals are flushed to zero. If denormals
are not flushed, the non-zero output would last much longer.

Verified on Linux, Windows, OSX, and Android that denormals are flushed.

The test is not included; it seems not that important to ensure
denormals are flushed.

BUG=395254

Review URL: https://codereview.chromium.org/402803003

git-svn-id: svn://svn.chromium.org/blink/trunk@178620 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent 7584e70e
...@@ -34,11 +34,18 @@ namespace blink { ...@@ -34,11 +34,18 @@ namespace blink {
// Deal with denormals. They can very seriously impact performance on x86. // Deal with denormals. They can very seriously impact performance on x86.
// Define HAVE_DENORMAL if we support flushing denormals to zero. // Define HAVE_DENORMAL if we support flushing denormals to zero.
#if OS(WIN) && COMPILER(MSVC) #if OS(WIN) && COMPILER(MSVC)
// Windows compiled using MSVC with SSE2
#define HAVE_DENORMAL 1 #define HAVE_DENORMAL 1
#endif #endif
#if COMPILER(GCC) && (CPU(X86) || CPU(X86_64)) #if COMPILER(GCC) && (CPU(X86) || CPU(X86_64))
// X86 chips can flush denormals
#define HAVE_DENORMAL 1
#endif
#if CPU(ARM) || CPU(ARM64)
#define HAVE_DENORMAL 1 #define HAVE_DENORMAL 1
#endif #endif
...@@ -48,43 +55,34 @@ public: ...@@ -48,43 +55,34 @@ public:
DenormalDisabler() DenormalDisabler()
: m_savedCSR(0) : m_savedCSR(0)
{ {
#if OS(WIN) && COMPILER(MSVC) disableDenormals();
// Save the current state, and set mode to flush denormals.
//
// http://stackoverflow.com/questions/637175/possible-bug-in-controlfp-s-may-not-restore-control-word-correctly
_controlfp_s(&m_savedCSR, 0, 0);
unsigned unused;
_controlfp_s(&unused, _DN_FLUSH, _MCW_DN);
#else
m_savedCSR = getCSR();
setCSR(m_savedCSR | 0x8040);
#endif
} }
~DenormalDisabler() ~DenormalDisabler()
{ {
#if OS(WIN) && COMPILER(MSVC) restoreState();
unsigned unused;
_controlfp_s(&unused, m_savedCSR, _MCW_DN);
#else
setCSR(m_savedCSR);
#endif
} }
// This is a nop if we can flush denormals to zero in hardware. // This is a nop if we can flush denormals to zero in hardware.
static inline float flushDenormalFloatToZero(float f) static inline float flushDenormalFloatToZero(float f)
{ {
#if OS(WIN) && COMPILER(MSVC) && (!_M_IX86_FP)
// For systems using x87 instead of sse, there's no hardware support
// to flush denormals automatically. Hence, we need to flush
// denormals to zero manually.
return (fabs(f) < FLT_MIN) ? 0.0f : f;
#else
return f; return f;
#endif
} }
private: private:
unsigned m_savedCSR;
#if COMPILER(GCC) && (CPU(X86) || CPU(X86_64)) #if COMPILER(GCC) && (CPU(X86) || CPU(X86_64))
inline void disableDenormals()
{
m_savedCSR = getCSR();
setCSR(m_savedCSR | 0x8040);
}
inline void restoreState()
{
setCSR(m_savedCSR);
}
inline int getCSR() inline int getCSR()
{ {
int result; int result;
...@@ -98,9 +96,57 @@ private: ...@@ -98,9 +96,57 @@ private:
asm volatile("ldmxcsr %0" : : "m" (temp)); asm volatile("ldmxcsr %0" : : "m" (temp));
} }
#elif OS(WIN) && COMPILER(MSVC)
inline void disableDenormals()
{
// Save the current state, and set mode to flush denormals.
//
// http://stackoverflow.com/questions/637175/possible-bug-in-controlfp-s-may-not-restore-control-word-correctly
_controlfp_s(&m_savedCSR, 0, 0);
unsigned unused;
_controlfp_s(&unused, _DN_FLUSH, _MCW_DN);
}
inline void restoreState()
{
unsigned unused;
_controlfp_s(&unused, m_savedCSR, _MCW_DN);
}
#elif CPU(ARM) || CPU(ARM64)
inline void disableDenormals()
{
m_savedCSR = getStatusWord();
// Bit 24 is the flush-to-zero mode control bit. Setting it to 1 flushes denormals to 0.
setStatusWord(m_savedCSR | (1 << 24));
}
inline void restoreState()
{
setStatusWord(m_savedCSR);
}
inline int getStatusWord()
{
int result;
#if CPU(ARM64)
asm volatile("mrs %[result], FPCR" : [result] "=r" (result));
#else
asm volatile("vmrs %[result], FPSCR" : [result] "=r" (result));
#endif
return result;
}
inline void setStatusWord(int a)
{
#if CPU(ARM64)
asm volatile("msr FPCR, %[src]" : : [src] "r" (a));
#else
asm volatile("vmsr FPSCR, %[src]" : : [src] "r" (a));
#endif
}
#endif #endif
unsigned m_savedCSR;
}; };
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment