[PartitionAlloc] Extend SpinningFutex to Windows.

SpinningFutex is essentially an OS-provided lock with spinning in userspace before sleeping. This is implemented on Linux using futex(), which is not available on Windows. However, SRWLocks have the TryAcquireSRWLockExclusive() function which is functionally equivalent to the userspace CAS for futex()s. This CL: - Renames SpinningFutex to SpinningMutex - Extracts the common code - Extends it to Windows by using SRWLock instead of futex() The fast path is likely a bit slower than on Linux, as there is still an external function call, but it should be pretty comparable. Bug: 1061437 Change-Id: I7d71055865568738a569d7252f38a7a6773795e0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2467916 Commit-Queue: Benoit L <lizeb@chromium.org> Reviewed-by: Bartek Nowierski <bartekn@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Cr-Commit-Position: refs/heads/master@{#816987}

[PartitionAlloc] Extend SpinningFutex to Windows.
SpinningFutex is essentially an OS-provided lock with spinning in userspace before sleeping. This is implemented on Linux using futex(), which is not available on Windows. However, SRWLocks have the TryAcquireSRWLockExclusive() function which is functionally equivalent to the userspace CAS for futex()s. This CL: - Renames SpinningFutex to SpinningMutex - Extracts the common code - Extends it to Windows by using SRWLock instead of futex() The fast path is likely a bit slower than on Linux, as there is still an external function call, but it should be pretty comparable. Bug: 1061437 Change-Id: I7d71055865568738a569d7252f38a7a6773795e0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2467916 Commit-Queue: Benoit L <lizeb@chromium.org> Reviewed-by: Bartek Nowierski <bartekn@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Cr-Commit-Position: refs/heads/master@{#816987}
d7f053aa · Benoit Lize · Commit Bot · e97e74fa · d7f053aa · d7f053aa
Commit d7f053aa authored Oct 14, 2020 by Benoit Lize Committed by Commit Bot Oct 14, 2020
6 changed files
--- a/base/BUILD.gn
+++ b/base/BUILD.gn
@@ -1794,6 +1794,8 @@ component("base") {
        "allocator/partition_allocator/pcscan.h",
        "allocator/partition_allocator/random.cc",
        "allocator/partition_allocator/random.h",
+        "allocator/partition_allocator/spinning_mutex.cc",
+        "allocator/partition_allocator/spinning_mutex.h",
        "allocator/partition_allocator/thread_cache.cc",
        "allocator/partition_allocator/thread_cache.h",
        "allocator/partition_allocator/yield_processor.h",
@@ -1811,13 +1813,6 @@ component("base") {
          "allocator/partition_allocator/page_allocator_internals_fuchsia.h",
        ]
      }
-
-      if (is_linux || is_chromeos || is_android) {
-        sources += [
-          "allocator/partition_allocator/spinning_futex_linux.cc",
-          "allocator/partition_allocator/spinning_futex_linux.h",
-        ]
-      }
    }
  }


--- a/base/allocator/partition_allocator/partition_lock.cc
+++ b/base/allocator/partition_allocator/partition_lock.cc
@@ -7,19 +7,11 @@
 #include "base/allocator/partition_allocator/yield_processor.h"
 #include "base/threading/platform_thread.h"

-#if defined(OS_WIN)
-#include <windows.h>
-#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
+#if !defined(PA_HAS_SPINNING_MUTEX)
+
+#if defined(OS_POSIX) || defined(OS_FUCHSIA)
 #include <sched.h>
-#endif

-// The YIELD_THREAD macro tells the OS to relinquish our quantum. This is
-// basically a worst-case fallback, and if you're hitting it with any frequency
-// you really should be using a proper lock (such as |base::Lock|)rather than
-// these spinlocks.
-#if defined(OS_WIN)
-#define YIELD_THREAD SwitchToThread()
-#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
 #define YIELD_THREAD sched_yield()

 #else  // Other OS
@@ -27,7 +19,7 @@
 #warning "Thread yield not supported on this OS."
 #define YIELD_THREAD ((void)0)

-#endif  // OS_WIN
+#endif  // defined(OS_POSIX) || defined(OS_FUCHSIA)

 namespace base {
 namespace internal {
@@ -66,3 +58,5 @@ void SpinLock::AcquireSlow() {

 }  // namespace internal
 }  // namespace base
+
+#endif  // !defined(PA_HAS_SPINNING_MUTEX)
--- a/base/allocator/partition_allocator/partition_lock.h
+++ b/base/allocator/partition_allocator/partition_lock.h
@@ -9,15 +9,11 @@
 #include <type_traits>

 #include "base/allocator/buildflags.h"
-#include "base/no_destructor.h"
+#include "base/allocator/partition_allocator/spinning_mutex.h"
 #include "base/thread_annotations.h"
 #include "base/threading/platform_thread.h"
 #include "build/build_config.h"

-#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID)
-#include "base/allocator/partition_allocator/spinning_futex_linux.h"
-#endif
-
 namespace base {
 namespace internal {

@@ -57,6 +53,7 @@ class SCOPED_LOCKABLE ScopedUnlockGuard {
  MaybeSpinLock<thread_safe>& lock_;
 };

+#if !defined(PA_HAS_SPINNING_MUTEX)
 // Spinlock. Do not use, to be removed. crbug.com/1061437.
 class BASE_EXPORT SpinLock {
 public:
@@ -89,6 +86,7 @@ class BASE_EXPORT SpinLock {

  std::atomic_int lock_{0};
 };
+#endif  // !defined(PA_HAS_SPINNING_MUTEX)

 template <>
 class LOCKABLE MaybeSpinLock<true> {
@@ -104,7 +102,7 @@ class LOCKABLE MaybeSpinLock<true> {
    //
    // To avoid that, crash quickly when the code becomes reentrant.
    PlatformThreadRef current_thread = PlatformThread::CurrentRef();
-    if (!lock_->Try()) {
+    if (!lock_.Try()) {
      // The lock wasn't free when we tried to acquire it. This can be because
      // another thread or *this* thread was holding it.
      //
@@ -123,11 +121,11 @@ class LOCKABLE MaybeSpinLock<true> {
        // issue.
        IMMEDIATE_CRASH();
      }
-      lock_->Acquire();
+      lock_.Acquire();
    }
    owning_thread_ref_.store(current_thread, std::memory_order_relaxed);
 #else
-    lock_->Acquire();
+    lock_.Acquire();
 #endif
  }

@@ -135,28 +133,28 @@ class LOCKABLE MaybeSpinLock<true> {
 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC) && DCHECK_IS_ON()
    owning_thread_ref_.store(PlatformThreadRef(), std::memory_order_relaxed);
 #endif
-    lock_->Release();
+    lock_.Release();
  }
  void AssertAcquired() const ASSERT_EXCLUSIVE_LOCK() {
-    lock_->AssertAcquired();
+    lock_.AssertAcquired();
  }

 private:
-#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID)
-  base::NoDestructor<SpinningFutex> lock_;
+#if defined(PA_HAS_SPINNING_MUTEX)
+  SpinningMutex lock_;
 #else
-  // base::Lock is slower on the fast path than SpinLock, hence we still use it
-  // on non-DCHECK() builds. crbug.com/1125999
-  base::NoDestructor<SpinLock> lock_;
-  // base::NoDestructor is here to use the same code elsewhere, we are not
-  // leaking anything.
-  static_assert(std::is_trivially_destructible<SpinLock>::value, "");
-#endif
+  // base::Lock is slower on the fast path than SpinLock, hence we still use
+  // SpinLock. crbug.com/1125999
+  SpinLock lock_;
+#endif  // defined(PA_HAS_SPINNING_MUTEX)

 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC) && DCHECK_IS_ON()
  std::atomic<PlatformThreadRef> owning_thread_ref_ GUARDED_BY(lock_);
 #endif
 };
+// We want PartitionRoot to not have a global destructor, so this should not
+// have one.
+static_assert(std::is_trivially_destructible<MaybeSpinLock<true>>::value, "");

 template <>
 class LOCKABLE MaybeSpinLock<false> {
@@ -170,7 +168,7 @@ class LOCKABLE MaybeSpinLock<false> {

 static_assert(
    sizeof(MaybeSpinLock<true>) == sizeof(MaybeSpinLock<false>),
-    "Sizes should be equal to enseure identical layout of PartitionRoot");
+    "Sizes should be equal to ensure identical layout of PartitionRoot");

 }  // namespace internal
 }  // namespace base

--- a/base/allocator/partition_allocator/spinning_futex_linux.cc
+++ b/base/allocator/partition_allocator/spinning_futex_linux.cc
@@ -2,22 +2,25 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

-#include "base/allocator/partition_allocator/spinning_futex_linux.h"
+#include "base/allocator/partition_allocator/spinning_mutex.h"

 #include "base/allocator/partition_allocator/partition_alloc_check.h"
 #include "build/build_config.h"

-#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID)
+#if defined(PA_HAS_SPINNING_MUTEX)

+#if defined(PA_HAS_LINUX_KERNEL)
 #include <errno.h>
 #include <linux/futex.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+#endif  // defined(PA_HAS_LINUX_KERNEL)

 namespace base {
 namespace internal {
+#if defined(PA_HAS_LINUX_KERNEL)

-void SpinningFutex::FutexWait() {
+void SpinningMutex::FutexWait() {
  // Save and restore errno.
  int saved_errno = errno;
  // Don't check the return value, as we will not be awaken by a timeout, since
@@ -50,7 +53,7 @@ void SpinningFutex::FutexWait() {
  errno = saved_errno;
 }

-void SpinningFutex::FutexWake() {
+void SpinningMutex::FutexWake() {
  int saved_errno = errno;
  long retval = syscall(SYS_futex, &state_, FUTEX_WAKE | FUTEX_PRIVATE_FLAG,
                        1 /* wake up a single waiter */, nullptr, nullptr, 0);
@@ -58,7 +61,7 @@ void SpinningFutex::FutexWake() {
  errno = saved_errno;
 }

-void SpinningFutex::LockSlow() {
+void SpinningMutex::LockSlow() {
  // If this thread gets awaken but another one got the lock first, then go back
  // to sleeping. See comments in |FutexWait()| to see why a loop is required.
  while (state_.exchange(kLockedContended, std::memory_order_acquire) !=
@@ -67,7 +70,14 @@ void SpinningFutex::LockSlow() {
  }
 }

+#else
+
+void SpinningMutex::LockSlow() {
+  ::AcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
+}
+
+#endif
 }  // namespace internal
 }  // namespace base

-#endif  // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID)
+#endif  // defined(PA_HAS_SPINNING_MUTEX)
--- a/base/allocator/partition_allocator/spinning_futex_linux.h
+++ b/base/allocator/partition_allocator/spinning_futex_linux.h
@@ -2,8 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

-#ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_FUTEX_LINUX_H_
-#define BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_FUTEX_LINUX_H_
+#ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
+#define BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_

 #include <algorithm>
 #include <atomic>
@@ -11,17 +11,27 @@
 #include "base/allocator/partition_allocator/yield_processor.h"
 #include "base/base_export.h"
 #include "base/compiler_specific.h"
+#include "base/thread_annotations.h"
 #include "build/build_config.h"

-#if !(defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID))
-#error "Not supported"
+#if defined(OS_WIN)
+#include <windows.h>
 #endif

+#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID)
+#define PA_HAS_LINUX_KERNEL
+#endif
+
+#if defined(PA_HAS_LINUX_KERNEL) || defined(OS_WIN)
+#define PA_HAS_SPINNING_MUTEX
+#endif
+
+#if defined(PA_HAS_SPINNING_MUTEX)
 namespace base {
 namespace internal {

-// Simple spinning futex lock. It will spin in user space a set number of times
-// before going into the kernel to sleep.
+// Simple spinning lock. It will spin in user space a set number of times before
+// going into the kernel to sleep.
 //
 // This is intended to give "the best of both worlds" between a SpinLock and
 // base::Lock:
@@ -33,23 +43,30 @@ namespace internal {
 // We don't rely on base::Lock which we could make spin (by calling Try() in a
 // loop), as performance is below a custom spinlock as seen on high-level
 // benchmarks. Instead this implements a simple non-recursive mutex on top of
-// the futex() syscall. The main difference between this and a libc
-// implementation is that it only supports the simplest path: private (to a
-// process), non-recursive mutexes with no priority inheritance, no timed waits.
+// the futex() syscall on Linux, and SRWLock on Windows. The main difference
+// between this and a libc implementation is that it only supports the simplest
+// path: private (to a process), non-recursive mutexes with no priority
+// inheritance, no timed waits.
 //
 // As an interesting side-effect to be used in the allocator, this code does not
 // make any allocations, locks are small with a constexpr constructor and no
 // destructor.
-class BASE_EXPORT SpinningFutex {
+class LOCKABLE BASE_EXPORT SpinningMutex {
 public:
-  inline constexpr SpinningFutex();
-  ALWAYS_INLINE void Acquire();
-  ALWAYS_INLINE void Release();
-  ALWAYS_INLINE bool Try();
+  inline constexpr SpinningMutex();
+  ALWAYS_INLINE void Acquire() EXCLUSIVE_LOCK_FUNCTION();
+  ALWAYS_INLINE void Release() UNLOCK_FUNCTION();
+  ALWAYS_INLINE bool Try() EXCLUSIVE_TRYLOCK_FUNCTION(true);
  void AssertAcquired() const {}  // Not supported.

 private:
  void LockSlow();
+
+  // Same as SpinLock, not scientifically calibrated. Consider lowering later,
+  // as the slow path has better characteristics than SpinLocks's.
+  static constexpr int kSpinCount = 1000;
+
+#if defined(PA_HAS_LINUX_KERNEL)
  void FutexWait();
  void FutexWake();

@@ -57,14 +74,13 @@ class BASE_EXPORT SpinningFutex {
  static constexpr int kLockedUncontended = 1;
  static constexpr int kLockedContended = 2;

-  // Same as SpinLock, not scientifically calibrated. Consider lowering later,
-  // as the slow path has better characteristics than SpinLocks's.
-  static constexpr int kSpinCount = 1000;
-
  std::atomic<int32_t> state_{kUnlocked};
+#else
+  SRWLOCK lock_ = SRWLOCK_INIT;
+#endif
 };

-ALWAYS_INLINE void SpinningFutex::Acquire() {
+ALWAYS_INLINE void SpinningMutex::Acquire() {
  int tries = 0;
  int backoff = 1;
  // Busy-waiting is inlined, which is fine as long as we have few callers. This
@@ -93,7 +109,11 @@ ALWAYS_INLINE void SpinningFutex::Acquire() {
  LockSlow();
 }

-ALWAYS_INLINE bool SpinningFutex::Try() {
+inline constexpr SpinningMutex::SpinningMutex() = default;
+
+#if defined(PA_HAS_LINUX_KERNEL)
+
+ALWAYS_INLINE bool SpinningMutex::Try() {
  int expected = kUnlocked;
  return (state_.load(std::memory_order_relaxed) == expected) &&
         state_.compare_exchange_strong(expected, kLockedUncontended,
@@ -101,9 +121,7 @@ ALWAYS_INLINE bool SpinningFutex::Try() {
                                        std::memory_order_relaxed);
 }

-inline constexpr SpinningFutex::SpinningFutex() = default;
-
-ALWAYS_INLINE void SpinningFutex::Release() {
+ALWAYS_INLINE void SpinningMutex::Release() {
  if (UNLIKELY(state_.exchange(kUnlocked, std::memory_order_release) ==
               kLockedContended)) {
    // |kLockedContended|: there is a waiter to wake up.
@@ -123,6 +141,20 @@ ALWAYS_INLINE void SpinningFutex::Release() {
  }
 }

+#else
+
+ALWAYS_INLINE bool SpinningMutex::Try() {
+  return !!::TryAcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
+}
+
+ALWAYS_INLINE void SpinningMutex::Release() {
+  ::ReleaseSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
+}
+
+#endif
+
 }  // namespace internal
 }  // namespace base
-#endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_FUTEX_LINUX_H_
+#endif  // defined(PA_HAS_SPINNING_MUTEX)
+
+#endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
--- a/base/allocator/partition_allocator/yield_processor.h
+++ b/base/allocator/partition_allocator/yield_processor.h
@@ -34,7 +34,6 @@
 #endif  // ARCH

 #ifndef YIELD_PROCESSOR
-#warning "Processor yield not supported on this architecture."
 #define YIELD_PROCESSOR ((void)0)
 #endif