[PA] Eagerly decommit single-slot slot spans

Empty slot spans are typically registered for lazy decommitting, to avoid perf regression when going back and forth between decommitted<->active states. However, we haven't observed a perf difference for single-slot slot spans, in which case better to decommit them as soon as they become free. Change-Id: I09e09055dd2b54b17a54ed35f518d72e9a16e310 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2549024 Auto-Submit: Bartek Nowierski <bartekn@chromium.org> Reviewed-by: Benoit L <lizeb@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Commit-Queue: Bartek Nowierski <bartekn@chromium.org> Cr-Commit-Position: refs/heads/master@{#831852}

[PA] Eagerly decommit single-slot slot spans
Empty slot spans are typically registered for lazy decommitting, to avoid perf regression when going back and forth between decommitted<->active states. However, we haven't observed a perf difference for single-slot slot spans, in which case better to decommit them as soon as they become free. Change-Id: I09e09055dd2b54b17a54ed35f518d72e9a16e310 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2549024 Auto-Submit: Bartek Nowierski <bartekn@chromium.org> Reviewed-by: Benoit L <lizeb@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Commit-Queue: Bartek Nowierski <bartekn@chromium.org> Cr-Commit-Position: refs/heads/master@{#831852}
cdf4fd98 · Bartek Nowierski · Commit Bot · b30e86f0 · cdf4fd98 · cdf4fd98
Commit cdf4fd98 authored Nov 30, 2020 by Bartek Nowierski Committed by Commit Bot Nov 30, 2020
2 changed files
--- a/base/allocator/partition_allocator/partition_alloc_unittest.cc
+++ b/base/allocator/partition_allocator/partition_alloc_unittest.cc
@@ -1796,7 +1796,7 @@ TEST_F(PartitionAllocTest, DumpMemoryStats) {
    allocator.root()->Free(ptr2);
  }

-  // This test checks large-but-not-quite-direct allocations.
+  // This test checks single-slot slot span allocations.
  {
    const size_t requested_size = 16 * SystemPageSize();
    void* ptr = allocator.root()->Alloc(requested_size + 1, type_name);
@@ -1842,12 +1842,12 @@ TEST_F(PartitionAllocTest, DumpMemoryStats) {
      EXPECT_FALSE(stats->is_direct_map);
      EXPECT_EQ(slot_size, stats->bucket_slot_size);
      EXPECT_EQ(0u, stats->active_bytes);
-      EXPECT_EQ(slot_size, stats->resident_bytes);
-      EXPECT_EQ(slot_size, stats->decommittable_bytes);
+      EXPECT_EQ(0u, stats->resident_bytes);
+      EXPECT_EQ(0u, stats->decommittable_bytes);
      EXPECT_EQ(0u, stats->num_full_slot_spans);
      EXPECT_EQ(0u, stats->num_active_slot_spans);
-      EXPECT_EQ(1u, stats->num_empty_slot_spans);
-      EXPECT_EQ(0u, stats->num_decommitted_slot_spans);
+      EXPECT_EQ(0u, stats->num_empty_slot_spans);
+      EXPECT_EQ(1u, stats->num_decommitted_slot_spans);
    }

    void* ptr2 = allocator.root()->Alloc(requested_size + SystemPageSize() + 1,
@@ -2101,8 +2101,12 @@ TEST_F(PartitionAllocTest, PurgeDiscardableManyPages) {
  const size_t kFirstAllocPages = kHasLargePages ? 32 : 64;
  const size_t kSecondAllocPages = kHasLargePages ? 31 : 61;

+  if (kFirstAllocPages > MaxSystemPagesPerSlotSpan()) {
+    GTEST_SKIP() << "Immediate decommitting interferes with this test.";
+  }
+
  // Detect case (1) from above.
-  DCHECK_LT(kFirstAllocPages * SystemPageSize(), 1UL << kMaxBucketedOrder);
+  ASSERT_LT(kFirstAllocPages * SystemPageSize(), 1UL << kMaxBucketedOrder);

  const size_t kDeltaPages = kFirstAllocPages - kSecondAllocPages;

@@ -2128,7 +2132,7 @@ TEST_F(PartitionAllocTest, PurgeDiscardableManyPages) {
  EXPECT_EQ(kFirstAllocPages * SystemPageSize(), stats->resident_bytes);

  for (size_t i = 0; i < kFirstAllocPages; i++)
-    CHECK_PAGE_IN_CORE(p.PageAtIndex(i), true);
+    CHECK_PAGE_IN_CORE(p.PageAtIndex(i), false);

  allocator.root()->PurgeMemory(PartitionPurgeDiscardUnusedSystemPages);

@@ -2570,10 +2574,12 @@ TEST_F(PartitionAllocTest, Bookkeeping) {
  EXPECT_EQ(expected_committed_size, root.total_size_of_committed_pages);
  EXPECT_EQ(expected_super_pages_size, root.total_size_of_super_pages);

-  // Freeing memory doesn't result in decommitting pages right away.
+  // Freeing memory for larger buckets decommits pages right away. The address
+  // space isn't released though.
  root.Free(ptr);
  root.Free(ptr2);
  root.Free(ptr3);
+  expected_committed_size -= 3 * bucket->get_bytes_per_span();
  EXPECT_EQ(expected_committed_size, root.total_size_of_committed_pages);
  EXPECT_EQ(expected_super_pages_size, root.total_size_of_super_pages);


--- a/base/allocator/partition_allocator/partition_page.cc
+++ b/base/allocator/partition_allocator/partition_page.cc
@@ -114,8 +114,8 @@ SlotSpanMetadata<thread_safe>::get_sentinel_slot_span() {

 template <bool thread_safe>
 DeferredUnmap SlotSpanMetadata<thread_safe>::FreeSlowPath() {
-#if DCHECK_IS_ON()
  auto* root = PartitionRoot<thread_safe>::FromSlotSpan(this);
+#if DCHECK_IS_ON()
  root->lock_.AssertAcquired();
 #endif
  PA_DCHECK(this != get_sentinel_slot_span());
@@ -130,10 +130,21 @@ DeferredUnmap SlotSpanMetadata<thread_safe>::FreeSlowPath() {
      bucket->SetNewActiveSlotSpan();
    PA_DCHECK(bucket->active_slot_spans_head != this);

-    if (CanStoreRawSize())
+    // Decommit the slot span, but not unconditionally. For certain slot sizes
+    // there could be enough churn to result in slot spans transitioning between
+    // decommitted<->active states often enough to cause a perf regression. In
+    // these cases it's better to register them for lazy decommitting.
+    // However, decommitting single-slot slot spans immediately has been
+    // empirically confirmed not to cause a regression, but future optimizations
+    // may base the decision on slot size instead (see crrev.com/c/2549024for
+    // more details).
+    if (CanStoreRawSize()) {
      SetRawSize(0);

-    PartitionRegisterEmptySlotSpan(this);
+      Decommit(root);
+    } else {
+      PartitionRegisterEmptySlotSpan(this);
+    }
  } else {
    PA_DCHECK(!bucket->is_direct_mapped());
    // Ensure that the slot span is full. That's the only valid case if we
@@ -165,6 +176,7 @@ template <bool thread_safe>
 void SlotSpanMetadata<thread_safe>::Decommit(PartitionRoot<thread_safe>* root) {
  root->lock_.AssertAcquired();
  PA_DCHECK(is_empty());
+  PA_DCHECK(empty_cache_index == -1);
  PA_DCHECK(!bucket->is_direct_mapped());
  void* addr = SlotSpanMetadata::ToPointer(this);
  root->DecommitSystemPagesForData(addr, bucket->get_bytes_per_span(),