Commit a15cf3ca authored by Justin Schuh's avatar Justin Schuh Committed by Commit Bot

Add optimized negation to ClampedNumeric

Integrates the GCC subtract-carry intrinsics for intel, etc, and the
saturated instructions on arm.

Bug: 672489
Change-Id: I5db9d94224ab9c87fb025e0150879558a64f238f
Reviewed-on: https://chromium-review.googlesource.com/571404
Commit-Queue: Justin Schuh <jschuh@chromium.org>
Reviewed-by: default avatarWez <wez@chromium.org>
Cr-Commit-Position: refs/heads/master@{#486929}
parent cfbb49e3
......@@ -80,12 +80,16 @@ class ClampedNumeric {
constexpr ClampedNumeric operator-() const {
return ClampedNumeric<T>(
// The negation of two's complement int min is int min, so that's the
// only overflow case we have to check for.
// only overflow case we have to check for. And in the case of a
// run-time variable value_, we can use an optimized code path.
std::is_signed<T>::value
? ((std::is_floating_point<T>::value ||
NegateWrapper(value_) != std::numeric_limits<T>::lowest())
? NegateWrapper(value_)
: std::numeric_limits<T>::max())
? (IsCompileTimeConstant(value_)
? ((std::is_floating_point<T>::value ||
NegateWrapper(value_) !=
std::numeric_limits<T>::lowest())
? NegateWrapper(value_)
: std::numeric_limits<T>::max())
: ClampedSubOp<T, T>::template Do<T>(T(0), value_))
: T(0)); // Clamped unsigned negation is always zero.
}
......
......@@ -126,6 +126,50 @@ struct ClampedAddFastOp {
}
};
// This is the fastest negation on Intel, and a decent fallback on arm.
__attribute__((always_inline)) inline int8_t ClampedNegate(uint8_t value) {
uint8_t carry;
return __builtin_subcb(0, value, 0, &carry) + carry;
}
__attribute__((always_inline)) inline int8_t ClampedNegate(int8_t value) {
return ClampedNegate(static_cast<uint8_t>(value));
}
__attribute__((always_inline)) inline int16_t ClampedNegate(uint16_t value) {
uint16_t carry;
return __builtin_subcs(0, value, 0, &carry) + carry;
}
__attribute__((always_inline)) inline int16_t ClampedNegate(int16_t value) {
return ClampedNegate(static_cast<uint16_t>(value));
}
__attribute__((always_inline)) inline int32_t ClampedNegate(uint32_t value) {
uint32_t carry;
return __builtin_subc(0, value, 0, &carry) + carry;
}
__attribute__((always_inline)) inline int32_t ClampedNegate(int32_t value) {
return ClampedNegate(static_cast<uint32_t>(value));
}
// These are the LP64 platforms minus Mac (because Xcode blows up otherwise).
#if !defined(__APPLE__) && defined(__LP64__) && __LP64__
__attribute__((always_inline)) inline int64_t ClampedNegate(uint64_t value) {
uint64_t carry;
return __builtin_subcl(0, value, 0, &carry) + carry;
}
#else // Mac, Windows, and any IL32 platforms.
__attribute__((always_inline)) inline int64_t ClampedNegate(uint64_t value) {
uint64_t carry;
return __builtin_subcll(0, value, 0, &carry) + carry;
}
#endif
__attribute__((always_inline)) inline int64_t ClampedNegate(int64_t value) {
return ClampedNegate(static_cast<uint64_t>(value));
}
template <typename T, typename U>
struct ClampedSubFastOp {
static const bool is_supported = true;
......@@ -136,6 +180,17 @@ struct ClampedSubFastOp {
return ClampedSubFastAsmOp<T, U>::template Do<V>(x, y);
}
// Fast path for generic clamped negation.
if (std::is_same<T, U>::value && std::is_same<U, V>::value &&
IsCompileTimeConstant(x) && x == 0 && !IsCompileTimeConstant(y)) {
// We use IntegerForDigitsAndSign<> to convert the type to a uint*_t,
// otherwise Xcode can't resolve to the standard integral types correctly.
return ClampedNegate(
static_cast<typename IntegerForDigitsAndSign<
IntegerBitsPlusSign<T>::value, std::is_signed<T>::value>::type>(
y));
}
V result;
return !__builtin_sub_overflow(x, y, &result)
? result
......
......@@ -648,6 +648,26 @@ static void TestArithmetic(const char* dst, int line) {
TEST_EXPECTED_VALUE(1, -ClampedNumeric<Dst>(-1));
TEST_EXPECTED_VALUE(static_cast<Dst>(DstLimits::max() * -1),
-ClampedNumeric<Dst>(DstLimits::max()));
// The runtime paths for saturated negation differ significantly from what
// gets evaluated at compile-time. Making this test volatile forces the
// compiler to generate code rather than fold constant expressions.
volatile Dst value = Dst(0);
TEST_EXPECTED_VALUE(0, -MakeClampedNum(value));
value = Dst(1);
TEST_EXPECTED_VALUE(-1, -MakeClampedNum(value));
value = Dst(2);
TEST_EXPECTED_VALUE(-2, -MakeClampedNum(value));
value = Dst(-1);
TEST_EXPECTED_VALUE(1, -MakeClampedNum(value));
value = Dst(-2);
TEST_EXPECTED_VALUE(2, -MakeClampedNum(value));
value = DstLimits::max();
TEST_EXPECTED_VALUE(Dst(DstLimits::max() * -1), -MakeClampedNum(value));
value = Dst(-1 * DstLimits::max());
TEST_EXPECTED_VALUE(DstLimits::max(), -MakeClampedNum(value));
value = DstLimits::lowest();
TEST_EXPECTED_VALUE(DstLimits::max(), -MakeClampedNum(value));
}
// Generic absolute value.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment