Add optimized negation to ClampedNumeric

Integrates the GCC subtract-carry intrinsics for intel, etc, and the saturated instructions on arm. Bug: 672489 Change-Id: I5db9d94224ab9c87fb025e0150879558a64f238f Reviewed-on: https://chromium-review.googlesource.com/571404 Commit-Queue: Justin Schuh <jschuh@chromium.org> Reviewed-by: Wez <wez@chromium.org> Cr-Commit-Position: refs/heads/master@{#486929}

Add optimized negation to ClampedNumeric
Integrates the GCC subtract-carry intrinsics for intel, etc, and the saturated instructions on arm. Bug: 672489 Change-Id: I5db9d94224ab9c87fb025e0150879558a64f238f Reviewed-on: https://chromium-review.googlesource.com/571404 Commit-Queue: Justin Schuh <jschuh@chromium.org> Reviewed-by: Wez <wez@chromium.org> Cr-Commit-Position: refs/heads/master@{#486929}
a15cf3ca · Justin Schuh · Commit Bot · cfbb49e3 · a15cf3ca · a15cf3ca
Commit a15cf3ca authored Jul 14, 2017 by Justin Schuh Committed by Commit Bot Jul 14, 2017
3 changed files
--- a/base/numerics/clamped_math.h
+++ b/base/numerics/clamped_math.h
@@ -80,12 +80,16 @@ class ClampedNumeric {
  constexpr ClampedNumeric operator-() const {
    return ClampedNumeric<T>(
        // The negation of two's complement int min is int min, so that's the
-        // only overflow case we have to check for.
+        // only overflow case we have to check for. And in the case of a
+        // run-time variable value_, we can use an optimized code path.
        std::is_signed<T>::value
-            ? ((std::is_floating_point<T>::value ||
-                NegateWrapper(value_) != std::numeric_limits<T>::lowest())
-                   ? NegateWrapper(value_)
-                   : std::numeric_limits<T>::max())
+            ? (IsCompileTimeConstant(value_)
+                   ? ((std::is_floating_point<T>::value ||
+                       NegateWrapper(value_) !=
+                           std::numeric_limits<T>::lowest())
+                          ? NegateWrapper(value_)
+                          : std::numeric_limits<T>::max())
+                   : ClampedSubOp<T, T>::template Do<T>(T(0), value_))
            : T(0));  // Clamped unsigned negation is always zero.
  }


--- a/base/numerics/safe_math_clang_gcc_impl.h
+++ b/base/numerics/safe_math_clang_gcc_impl.h
@@ -126,6 +126,50 @@ struct ClampedAddFastOp {
  }
 };

+// This is the fastest negation on Intel, and a decent fallback on arm.
+__attribute__((always_inline)) inline int8_t ClampedNegate(uint8_t value) {
+  uint8_t carry;
+  return __builtin_subcb(0, value, 0, &carry) + carry;
+}
+
+__attribute__((always_inline)) inline int8_t ClampedNegate(int8_t value) {
+  return ClampedNegate(static_cast<uint8_t>(value));
+}
+
+__attribute__((always_inline)) inline int16_t ClampedNegate(uint16_t value) {
+  uint16_t carry;
+  return __builtin_subcs(0, value, 0, &carry) + carry;
+}
+
+__attribute__((always_inline)) inline int16_t ClampedNegate(int16_t value) {
+  return ClampedNegate(static_cast<uint16_t>(value));
+}
+
+__attribute__((always_inline)) inline int32_t ClampedNegate(uint32_t value) {
+  uint32_t carry;
+  return __builtin_subc(0, value, 0, &carry) + carry;
+}
+
+__attribute__((always_inline)) inline int32_t ClampedNegate(int32_t value) {
+  return ClampedNegate(static_cast<uint32_t>(value));
+}
+
+// These are the LP64 platforms minus Mac (because Xcode blows up otherwise).
+#if !defined(__APPLE__) && defined(__LP64__) && __LP64__
+__attribute__((always_inline)) inline int64_t ClampedNegate(uint64_t value) {
+  uint64_t carry;
+  return __builtin_subcl(0, value, 0, &carry) + carry;
+}
+#else  // Mac, Windows, and any IL32 platforms.
+__attribute__((always_inline)) inline int64_t ClampedNegate(uint64_t value) {
+  uint64_t carry;
+  return __builtin_subcll(0, value, 0, &carry) + carry;
+}
+#endif
+__attribute__((always_inline)) inline int64_t ClampedNegate(int64_t value) {
+  return ClampedNegate(static_cast<uint64_t>(value));
+}
+
 template <typename T, typename U>
 struct ClampedSubFastOp {
  static const bool is_supported = true;
@@ -136,6 +180,17 @@ struct ClampedSubFastOp {
      return ClampedSubFastAsmOp<T, U>::template Do<V>(x, y);
    }

+    // Fast path for generic clamped negation.
+    if (std::is_same<T, U>::value && std::is_same<U, V>::value &&
+        IsCompileTimeConstant(x) && x == 0 && !IsCompileTimeConstant(y)) {
+      // We use IntegerForDigitsAndSign<> to convert the type to a uint*_t,
+      // otherwise Xcode can't resolve to the standard integral types correctly.
+      return ClampedNegate(
+          static_cast<typename IntegerForDigitsAndSign<
+              IntegerBitsPlusSign<T>::value, std::is_signed<T>::value>::type>(
+              y));
+    }
+
    V result;
    return !__builtin_sub_overflow(x, y, &result)
               ? result

--- a/base/numerics/safe_numerics_unittest.cc
+++ b/base/numerics/safe_numerics_unittest.cc
@@ -648,6 +648,26 @@ static void TestArithmetic(const char* dst, int line) {
    TEST_EXPECTED_VALUE(1, -ClampedNumeric<Dst>(-1));
    TEST_EXPECTED_VALUE(static_cast<Dst>(DstLimits::max() * -1),
                        -ClampedNumeric<Dst>(DstLimits::max()));
+
+    // The runtime paths for saturated negation differ significantly from what
+    // gets evaluated at compile-time. Making this test volatile forces the
+    // compiler to generate code rather than fold constant expressions.
+    volatile Dst value = Dst(0);
+    TEST_EXPECTED_VALUE(0, -MakeClampedNum(value));
+    value = Dst(1);
+    TEST_EXPECTED_VALUE(-1, -MakeClampedNum(value));
+    value = Dst(2);
+    TEST_EXPECTED_VALUE(-2, -MakeClampedNum(value));
+    value = Dst(-1);
+    TEST_EXPECTED_VALUE(1, -MakeClampedNum(value));
+    value = Dst(-2);
+    TEST_EXPECTED_VALUE(2, -MakeClampedNum(value));
+    value = DstLimits::max();
+    TEST_EXPECTED_VALUE(Dst(DstLimits::max() * -1), -MakeClampedNum(value));
+    value = Dst(-1 * DstLimits::max());
+    TEST_EXPECTED_VALUE(DstLimits::max(), -MakeClampedNum(value));
+    value = DstLimits::lowest();
+    TEST_EXPECTED_VALUE(DstLimits::max(), -MakeClampedNum(value));
  }

  // Generic absolute value.