[PartitionAlloc] Don't perform expensive allocations to fill the thread cache.

Bug: 998048 Change-Id: Ie18db70f78188e4541aa4d5a0f44da2ce166524e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2569664 Commit-Queue: Benoit L <lizeb@chromium.org> Reviewed-by: Bartek Nowierski <bartekn@chromium.org> Cr-Commit-Position: refs/heads/master@{#833771}

[PartitionAlloc] Don't perform expensive allocations to fill the thread cache.
Bug: 998048 Change-Id: Ie18db70f78188e4541aa4d5a0f44da2ce166524e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2569664 Commit-Queue: Benoit L <lizeb@chromium.org> Reviewed-by: Bartek Nowierski <bartekn@chromium.org> Cr-Commit-Position: refs/heads/master@{#833771}
79b4e301 · Benoit Lize · Chromium LUCI CQ · 13ccd642 · 79b4e301 · 79b4e301
Commit 79b4e301 authored Dec 04, 2020 by Benoit Lize Committed by Chromium LUCI CQ Dec 04, 2020
5 changed files
--- a/base/allocator/partition_allocator/partition_alloc_constants.h
+++ b/base/allocator/partition_allocator/partition_alloc_constants.h
@@ -274,8 +274,13 @@ enum PartitionAllocFlags {
  PartitionAllocReturnNull = 1 << 0,
  PartitionAllocZeroFill = 1 << 1,
  PartitionAllocNoHooks = 1 << 2,  // Internal only.
+  // If the allocation requires a "slow path" (such as allocating/committing a
+  // new slot span), return nullptr instead. Note this makes all large
+  // allocations return nullptr, such as direct-mapped ones, and even for
+  // smaller ones, a nullptr value is common.
+  PartitionAllocFastPathOrReturnNull = 1 << 3,  // Internal only.

-  PartitionAllocLastFlag = PartitionAllocNoHooks
+  PartitionAllocLastFlag = PartitionAllocFastPathOrReturnNull
 };

 }  // namespace base

--- a/base/allocator/partition_allocator/partition_alloc_unittest.cc
+++ b/base/allocator/partition_allocator/partition_alloc_unittest.cc
@@ -2746,6 +2746,41 @@ TEST_F(PartitionAllocTest, RefCountBasic) {

 #endif

+TEST_F(PartitionAllocTest, FastPathOrReturnNull) {
+  size_t allocation_size = 64;
+  // The very first allocation is never a fast path one, since it needs a new
+  // super page and a new partition page.
+  EXPECT_FALSE(allocator.root()->AllocFlagsNoHooks(
+      PartitionAllocFastPathOrReturnNull, allocation_size));
+  void* ptr = allocator.root()->AllocFlagsNoHooks(0, allocation_size);
+  ASSERT_TRUE(ptr);
+
+  // Next one is, since the partition page has been activated.
+  void* ptr2 = allocator.root()->AllocFlagsNoHooks(
+      PartitionAllocFastPathOrReturnNull, allocation_size);
+  EXPECT_TRUE(ptr2);
+
+  // First allocation of a different bucket is slow.
+  EXPECT_FALSE(allocator.root()->AllocFlagsNoHooks(
+      PartitionAllocFastPathOrReturnNull, 2 * allocation_size));
+
+  size_t allocated_size = 2 * allocation_size;
+  std::vector<void*> ptrs;
+  while (void* new_ptr = allocator.root()->AllocFlagsNoHooks(
+             PartitionAllocFastPathOrReturnNull, allocation_size)) {
+    ptrs.push_back(new_ptr);
+    allocated_size += allocation_size;
+  }
+  EXPECT_LE(allocated_size,
+            PartitionPageSize() * kMaxPartitionPagesPerSlotSpan);
+
+  for (void* ptr_to_free : ptrs)
+    allocator.root()->FreeNoHooks(ptr_to_free);
+
+  allocator.root()->FreeNoHooks(ptr);
+  allocator.root()->FreeNoHooks(ptr2);
+}
+
 }  // namespace internal
 }  // namespace base


--- a/base/allocator/partition_allocator/partition_bucket.cc
+++ b/base/allocator/partition_allocator/partition_bucket.cc
@@ -511,6 +511,11 @@ void* PartitionBucket<thread_safe>::SlowPathAlloc(
    PA_DCHECK(this == &root->sentinel_bucket);
    PA_DCHECK(active_slot_spans_head ==
              SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
+
+    // No fast path for direct-mapped allocations.
+    if (flags & PartitionAllocFastPathOrReturnNull)
+      return nullptr;
+
    if (raw_size > MaxDirectMapped()) {
      if (return_null)
        return nullptr;
@@ -568,6 +573,10 @@ void* PartitionBucket<thread_safe>::SlowPathAlloc(
    }
    if (UNLIKELY(!new_slot_span) &&
        LIKELY(decommitted_slot_spans_head != nullptr)) {
+      // Commit can be expensive, don't do it.
+      if (flags & PartitionAllocFastPathOrReturnNull)
+        return nullptr;
+
      new_slot_span = decommitted_slot_spans_head;
      PA_DCHECK(new_slot_span->bucket == this);
      PA_DCHECK(new_slot_span->is_decommitted());
@@ -585,6 +594,10 @@ void* PartitionBucket<thread_safe>::SlowPathAlloc(
    }
    PA_DCHECK(new_slot_span);
  } else {
+    // Getting a new slot span is expensive, don't do it.
+    if (flags & PartitionAllocFastPathOrReturnNull)
+      return nullptr;
+
    // Third. If we get here, we need a brand new slot span.
    // TODO(bartekn): For single-slot slot spans, we can use rounded raw_size
    // as slot_span_committed_size.

--- a/base/allocator/partition_allocator/thread_cache.cc
+++ b/base/allocator/partition_allocator/thread_cache.cc
@@ -298,18 +298,25 @@ void ThreadCache::FillBucket(size_t bucket_index) {
  // Same as calling RawAlloc() |count| times, but acquires the lock only once.
  internal::ScopedGuard<internal::ThreadSafe> guard(root_->lock_);
  for (int i = 0; i < count; i++) {
-    // We allow the allocator to return nullptr, since filling the cache may
-    // safely fail, and the proper flag will be handled by the central
-    // allocator.
+    // Thread cache fill should not trigger expensive operations, to not grab
+    // the lock for a long time needlessly, but also to not inflate memory
+    // usage. Indeed, without PartitionAllocFastPathOrReturnNull, cache fill may
+    // activate a new PartitionPage, or even a new SuperPage, which is clearly
+    // not desirable.
    //
    // |raw_size| is set to the slot size, as we don't know it. However, it is
    // only used for direct-mapped allocations and single-slot ones anyway,
    // which are not handled here.
    void* ptr = root_->AllocFromBucket(
-        &root_->buckets[bucket_index], PartitionAllocReturnNull,
+        &root_->buckets[bucket_index], PartitionAllocFastPathOrReturnNull,
        root_->buckets[bucket_index].slot_size /* raw_size */,
        &utilized_slot_size, &is_already_zeroed);
-    // Central allocator is out of memory.
+
+    // Either the previous allocation would require a slow path allocation, or
+    // the central allocator is out of memory. If the bucket was filled with
+    // some objects, then the allocation will be handled normally. Otherwise,
+    // this goes to the central allocator, which will service the allocation,
+    // return nullptr or crash.
    if (!ptr)
      break;


--- a/base/allocator/partition_allocator/thread_cache_unittest.cc
+++ b/base/allocator/partition_allocator/thread_cache_unittest.cc
@@ -100,6 +100,13 @@ class ThreadCacheTest : public ::testing::Test {

    auto* tcache = g_root->thread_cache_for_testing();
    ASSERT_TRUE(tcache);
+
+    // Make sure that enough slot spans have been touched, otherwise cache fill
+    // becomes unpredictable (because it doesn't take slow paths in the
+    // allocator), which is an issue for tests.
+    FillThreadCacheAndReturnIndex(kSmallSize, 1000);
+    FillThreadCacheAndReturnIndex(kMediumSize, 1000);
+
    tcache->Purge();
  }