[PartitionAlloc] Record thread cache statistics.

This CL adds accounting for the thread cache, and surfaces the data in about:tracing when the thread cache is used for the FastMalloc() partition. The recorded data shows that for FastMalloc(), the current settings catch the vast majority of allocations, at a small memory cost, and that most allocations are on the main threads, as expected for FastMalloc(). Bug: 998048 Change-Id: Iaa55deb12f0c04dd7f301e15c7497b817f4843e0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2424309 Commit-Queue: Benoit L <lizeb@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Cr-Commit-Position: refs/heads/master@{#809774}

[PartitionAlloc] Record thread cache statistics.
This CL adds accounting for the thread cache, and surfaces the data in about:tracing when the thread cache is used for the FastMalloc() partition. The recorded data shows that for FastMalloc(), the current settings catch the vast majority of allocations, at a small memory cost, and that most allocations are on the main threads, as expected for FastMalloc(). Bug: 998048 Change-Id: Iaa55deb12f0c04dd7f301e15c7497b817f4843e0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2424309 Commit-Queue: Benoit L <lizeb@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Cr-Commit-Position: refs/heads/master@{#809774}
5470662a · Benoit Lize · Commit Bot · b56b05e8 · 5470662a · 5470662a
Commit 5470662a authored Sep 23, 2020 by Benoit Lize Committed by Commit Bot Sep 23, 2020
6 changed files
--- a/base/allocator/partition_allocator/partition_alloc.cc
+++ b/base/allocator/partition_allocator/partition_alloc.cc
@@ -845,6 +845,15 @@ void PartitionRoot<thread_safe>::DumpStats(const char* partition_name,
  stats.total_resident_bytes += direct_mapped_allocations_total_size;
  stats.total_active_bytes += direct_mapped_allocations_total_size;
+  stats.has_thread_cache = !is_light_dump && with_thread_cache;
+  if (stats.has_thread_cache) {
+    internal::ThreadCacheRegistry::Instance().DumpStats(
+        true, &stats.current_thread_cache_stats);
+    internal::ThreadCacheRegistry::Instance().DumpStats(
+        false, &stats.all_thread_caches_stats);
+  }
  dumper->PartitionDumpTotals(partition_name, &stats);
 }

--- a/base/allocator/partition_allocator/partition_alloc.h
+++ b/base/allocator/partition_allocator/partition_alloc.h
@@ -240,6 +240,10 @@ struct PartitionMemoryStats {
  size_t total_active_bytes;     // Total active bytes in the partition.
  size_t total_decommittable_bytes;  // Total bytes that could be decommitted.
  size_t total_discardable_bytes;    // Total bytes that could be discarded.
+  bool has_thread_cache;
+  internal::ThreadCacheStats current_thread_cache_stats;
+  internal::ThreadCacheStats all_thread_caches_stats;
 };
 // Struct used to retrieve memory statistics about a partition bucket. Used by

--- a/base/allocator/partition_allocator/thread_cache.cc
+++ b/base/allocator/partition_allocator/thread_cache.cc
@@ -28,6 +28,59 @@ static std::atomic<bool> g_has_instance;
 }  // namespace
+// static
+ThreadCacheRegistry& ThreadCacheRegistry::Instance() {
+  static NoDestructor<ThreadCacheRegistry> instance;
+  return *instance.get();
+}
+ThreadCacheRegistry::ThreadCacheRegistry() = default;
+void ThreadCacheRegistry::RegisterThreadCache(ThreadCache* cache) {
+  AutoLock scoped_locker(GetLock());
+  cache->next_ = nullptr;
+  cache->prev_ = nullptr;
+  ThreadCache* previous_head = list_head_;
+  list_head_ = cache;
+  cache->next_ = previous_head;
+  if (previous_head)
+    previous_head->prev_ = cache;
+}
+void ThreadCacheRegistry::UnregisterThreadCache(ThreadCache* cache) {
+  AutoLock scoped_locker(GetLock());
+  if (cache->prev_)
+    cache->prev_->next_ = cache->next_;
+  if (cache->next_)
+    cache->next_->prev_ = cache->prev_;
+  if (cache == list_head_)
+    list_head_ = cache->next_;
+}
+void ThreadCacheRegistry::DumpStats(bool my_thread_only,
+                                    ThreadCacheStats* stats) {
+  memset(reinterpret_cast<void*>(stats), 0, sizeof(ThreadCacheStats));
+  AutoLock scoped_locker(GetLock());
+  if (my_thread_only) {
+    auto* tcache = ThreadCache::Get();
+    if (!tcache)
+      return;
+    tcache->AccumulateStats(stats);
+  } else {
+    ThreadCache* tcache = list_head_;
+    while (tcache) {
+      // Racy, as other threads are still allocating. This is not an issue,
+      // since we are only interested in statistics. However, this means that
+      // count is not necessarily equal to hits + misses for the various types
+      // of events.
+      tcache->AccumulateStats(stats);
+      tcache = tcache->next_;
+    }
+  }
+}
 // static
 void ThreadCache::Init(PartitionRoot<ThreadSafe>* root) {
  bool ok = PartitionTlsCreate(&g_thread_cache_key, DeleteThreadCache);
@@ -62,7 +115,7 @@ ThreadCache* ThreadCache::Create(PartitionRoot<internal::ThreadSafe>* root) {
  void* buffer =
      root->RawAlloc(bucket, PartitionAllocZeroFill, sizeof(ThreadCache),
                     &allocated_size, &already_zeroed);
-  ThreadCache* tcache = new (buffer) ThreadCache();
+  ThreadCache* tcache = new (buffer) ThreadCache(root);
  // This may allocate.
  PartitionTlsSet(g_thread_cache_key, tcache);
@@ -70,10 +123,37 @@ ThreadCache* ThreadCache::Create(PartitionRoot<internal::ThreadSafe>* root) {
  return tcache;
 }
+ThreadCache::ThreadCache(PartitionRoot<ThreadSafe>* root)
+    : buckets_(), stats_(), root_(root), next_(nullptr), prev_(nullptr) {
+  ThreadCacheRegistry::Instance().RegisterThreadCache(this);
+}
 ThreadCache::~ThreadCache() {
+  ThreadCacheRegistry::Instance().UnregisterThreadCache(this);
  Purge();
 }
+void ThreadCache::AccumulateStats(ThreadCacheStats* stats) const {
+  stats->alloc_count += stats_.alloc_count;
+  stats->alloc_hits += stats_.alloc_hits;
+  stats->alloc_misses += stats_.alloc_misses;
+  stats->alloc_miss_empty += stats_.alloc_miss_empty;
+  stats->alloc_miss_too_large += stats_.alloc_miss_too_large;
+  stats->cache_fill_count += stats_.cache_fill_count;
+  stats->cache_fill_hits += stats_.cache_fill_hits;
+  stats->cache_fill_misses += stats_.cache_fill_misses;
+  stats->cache_fill_bucket_full += stats_.cache_fill_bucket_full;
+  stats->cache_fill_too_large += stats_.cache_fill_too_large;
+  for (size_t i = 0; i < kBucketCount; i++) {
+    stats->bucket_total_memory +=
+        buckets_[i].count * root_->buckets[i].slot_size;
+  }
+  stats->metadata_overhead += sizeof(*this);
+}
 void ThreadCache::Purge() {
  for (Bucket& bucket : buckets_) {
    size_t count = bucket.count;

--- a/base/allocator/partition_allocator/thread_cache.h
+++ b/base/allocator/partition_allocator/thread_cache.h
@@ -14,7 +14,9 @@
 #include "base/allocator/partition_allocator/partition_tls.h"
 #include "base/base_export.h"
 #include "base/gtest_prod_util.h"
+#include "base/no_destructor.h"
 #include "base/partition_alloc_buildflags.h"
+#include "base/synchronization/lock.h"
 namespace base {
@@ -24,6 +26,68 @@ class ThreadCache;
 extern BASE_EXPORT PartitionTlsKey g_thread_cache_key;
+// Most of these are not populated if PA_ENABLE_THREAD_CACHE_STATISTICS is not
+// defined.
+struct ThreadCacheStats {
+  uint64_t alloc_count;   // Total allocation requests.
+  uint64_t alloc_hits;    // Thread cache hits.
+  uint64_t alloc_misses;  // Thread cache misses.
+  // Allocation failure details:
+  uint64_t alloc_miss_empty;
+  uint64_t alloc_miss_too_large;
+  // Cache fill details:
+  uint64_t cache_fill_count;
+  uint64_t cache_fill_hits;
+  uint64_t cache_fill_misses;
+  uint64_t cache_fill_bucket_full;
+  uint64_t cache_fill_too_large;
+  // Memory cost:
+  uint64_t bucket_total_memory;
+  uint64_t metadata_overhead;
+};
+// Global registry of all ThreadCache instances.
+//
+// This class cannot allocate in the (Un)registerThreadCache() functions, as
+// they are called from ThreadCache constructor, which is from within the
+// allocator. However the other members can allocate.
+class BASE_EXPORT ThreadCacheRegistry {
+ public:
+  static ThreadCacheRegistry& Instance();
+  ~ThreadCacheRegistry() = delete;
+  void RegisterThreadCache(ThreadCache* cache);
+  void UnregisterThreadCache(ThreadCache* cache);
+  // Prints statistics for all thread caches, or this thread's only.
+  void DumpStats(bool my_thread_only, ThreadCacheStats* stats);
+  static Lock& GetLock() { return Instance().lock_; }
+ private:
+  friend class NoDestructor<ThreadCacheRegistry>;
+  ThreadCacheRegistry();
+  Lock lock_;
+  ThreadCache* list_head_ GUARDED_BY(GetLock()) = nullptr;
+};
+// Optional statistics collection.
+#if DCHECK_IS_ON()
+#define PA_ENABLE_THREAD_CACHE_STATISTICS 1
+#endif
+#if defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
+#define INCREMENT_COUNTER(counter) ++counter
+#define GET_COUNTER(counter) counter
+#else
+#define INCREMENT_COUNTER(counter) \
+  do {                             \
+  } while (0)
+#define GET_COUNTER(counter) 0
+#endif  // defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
 // Per-thread cache. *Not* threadsafe, must only be accessed from a single
 // thread.
 //
@@ -44,6 +108,7 @@ class BASE_EXPORT ThreadCache {
  static ThreadCache* Get() {
    return reinterpret_cast<ThreadCache*>(PartitionTlsGet(g_thread_cache_key));
  }
  // Create a new ThreadCache associated with |root|.
  // Must be called without the partition locked, as this may allocate.
  static ThreadCache* Create(PartitionRoot<ThreadSafe>* root);
@@ -79,13 +144,14 @@ class BASE_EXPORT ThreadCache {
  // Empties the cache.
  // The Partition lock must *not* be held when calling this.
  void Purge();
+  void AccumulateStats(ThreadCacheStats* stats) const;
  size_t bucket_count_for_testing(size_t index) const {
    return buckets_[index].count;
  }
 private:
-  ThreadCache() = default;
+  explicit ThreadCache(PartitionRoot<ThreadSafe>* root);
  struct Bucket {
    size_t count;
@@ -103,20 +169,39 @@ class BASE_EXPORT ThreadCache {
  static constexpr size_t kMaxCountPerBucket = 100;
  Bucket buckets_[kBucketCount];
+  ThreadCacheStats stats_;
+  PartitionRoot<ThreadSafe>* root_;
+  // Intrusive list since ThreadCacheRegistry::RegisterThreadCache() cannot
+  // allocate.
+  ThreadCache* next_ GUARDED_BY(ThreadCacheRegistry::GetLock());
+  ThreadCache* prev_ GUARDED_BY(ThreadCacheRegistry::GetLock());
+  friend class ThreadCacheRegistry;
  FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, LargeAllocationsAreNotCached);
  FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCaches);
+  FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, RecordStats);
+  FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, ThreadCacheRegistry);
+  FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCachesAccounting);
 };
 ALWAYS_INLINE bool ThreadCache::MaybePutInCache(void* address,
                                                size_t bucket_index) {
-  if (bucket_index >= kBucketCount)
+  INCREMENT_COUNTER(stats_.cache_fill_count);
+  if (bucket_index >= kBucketCount) {
+    INCREMENT_COUNTER(stats_.cache_fill_too_large);
+    INCREMENT_COUNTER(stats_.cache_fill_misses);
    return false;
+  }
  auto& bucket = buckets_[bucket_index];
-  if (bucket.count >= kMaxCountPerBucket)
+  if (bucket.count >= kMaxCountPerBucket) {
+    INCREMENT_COUNTER(stats_.cache_fill_bucket_full);
+    INCREMENT_COUNTER(stats_.cache_fill_misses);
    return false;
+  }
  PA_DCHECK(bucket.count != 0 || bucket.freelist_head == nullptr);
@@ -124,26 +209,37 @@ ALWAYS_INLINE bool ThreadCache::MaybePutInCache(void* address,
  entry->next = PartitionFreelistEntry::Encode(bucket.freelist_head);
  bucket.freelist_head = entry;
  bucket.count++;
+  INCREMENT_COUNTER(stats_.cache_fill_hits);
  return true;
 }
 ALWAYS_INLINE void* ThreadCache::GetFromCache(size_t bucket_index) {
+  INCREMENT_COUNTER(stats_.alloc_count);
  // Only handle "small" allocations.
-  if (bucket_index >= kBucketCount)
+  if (bucket_index >= kBucketCount) {
+    INCREMENT_COUNTER(stats_.alloc_miss_too_large);
+    INCREMENT_COUNTER(stats_.alloc_misses);
    return nullptr;
+  }
  auto& bucket = buckets_[bucket_index];
  auto* result = bucket.freelist_head;
  if (!result) {
    PA_DCHECK(bucket.count == 0);
+    INCREMENT_COUNTER(stats_.alloc_miss_empty);
+    INCREMENT_COUNTER(stats_.alloc_misses);
    return nullptr;
  }
  PA_DCHECK(bucket.count != 0);
  auto* next = EncodedPartitionFreelistEntry::Decode(result->next);
  PA_DCHECK(result != next);
  bucket.count--;
  PA_DCHECK(bucket.count != 0 || !next);
  bucket.freelist_head = next;
+  INCREMENT_COUNTER(stats_.alloc_hits);
  return result;
 }

--- a/base/allocator/partition_allocator/thread_cache_unittest.cc
+++ b/base/allocator/partition_allocator/thread_cache_unittest.cc
@@ -42,6 +42,18 @@ class LambdaThreadDelegate : public PlatformThread::Delegate {
  OnceClosure f_;
 };
+class DeltaCounter {
+ public:
+  explicit DeltaCounter(uint64_t& value)
+      : current_value_(value), initial_value_(value) {}
+  void Reset() { initial_value_ = current_value_; }
+  uint64_t Delta() const { return current_value_ - initial_value_; }
+ private:
+  uint64_t& current_value_;
+  uint64_t initial_value_;
+};
 // Need to be a global object without a destructor, because the cache is a
 // global object with a destructor (to handle thread destruction), and the
 // PartitionRoot has to outlive it.
@@ -68,9 +80,13 @@ size_t FillThreadCacheAndReturnIndex(size_t size, size_t count = 1) {
 class ThreadCacheTest : public ::testing::Test {
 protected:
  void SetUp() override {
+    // Make sure there is a thread cache.
+    void* data = g_root->Alloc(1, "");
+    g_root->Free(data);
    auto* tcache = g_root->thread_cache_for_testing();
-    if (tcache)
+    ASSERT_TRUE(tcache);
-      tcache->Purge();
+    tcache->Purge();
  }
  void TearDown() override {}
 };
@@ -154,16 +170,27 @@ TEST_F(ThreadCacheTest, NoCrossPartitionCache) {
  EXPECT_EQ(1u, tcache->bucket_count_for_testing(bucket_index));
 }
-#if ENABLE_THREAD_CACHE_STATISTICS  // Required to record hits and misses.
+#if defined(PA_ENABLE_THREAD_CACHE_STATISTICS)  // Required to record hits and
+                                                // misses.
 TEST_F(ThreadCacheTest, LargeAllocationsAreNotCached) {
  auto* tcache = g_root->thread_cache_for_testing();
-  size_t hits_before = tcache ? tcache->hits_ : 0;
+  DeltaCounter alloc_miss_counter{tcache->stats_.alloc_misses};
+  DeltaCounter alloc_miss_too_large_counter{
+      tcache->stats_.alloc_miss_too_large};
+  DeltaCounter cache_fill_counter{tcache->stats_.cache_fill_count};
+  DeltaCounter cache_fill_misses_counter{tcache->stats_.cache_fill_misses};
+  DeltaCounter cache_fill_too_large_counter{
+      tcache->stats_.cache_fill_too_large};
  FillThreadCacheAndReturnIndex(100 * 1024);
  tcache = g_root->thread_cache_for_testing();
-  EXPECT_EQ(hits_before, tcache->hits_);
+  EXPECT_EQ(1u, alloc_miss_counter.Delta());
+  EXPECT_EQ(1u, alloc_miss_too_large_counter.Delta());
+  EXPECT_EQ(1u, cache_fill_counter.Delta());
+  EXPECT_EQ(1u, cache_fill_misses_counter.Delta());
+  EXPECT_EQ(1u, cache_fill_too_large_counter.Delta());
 }
-#endif
+#endif  // defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
 TEST_F(ThreadCacheTest, DirectMappedAllocationsAreNotCached) {
  FillThreadCacheAndReturnIndex(1024 * 1024);
@@ -217,6 +244,106 @@ TEST_F(ThreadCacheTest, ThreadCacheReclaimedWhenThreadExits) {
  g_root->Free(tmp);
 }
+TEST_F(ThreadCacheTest, ThreadCacheRegistry) {
+  const size_t kTestSize = 100;
+  auto* parent_thread_tcache = g_root->thread_cache_for_testing();
+  ASSERT_TRUE(parent_thread_tcache);
+  LambdaThreadDelegate delegate{BindLambdaForTesting([&]() {
+    EXPECT_FALSE(g_root->thread_cache_for_testing());  // No allocations yet.
+    FillThreadCacheAndReturnIndex(kTestSize);
+    auto* tcache = g_root->thread_cache_for_testing();
+    EXPECT_TRUE(tcache);
+    AutoLock lock(ThreadCacheRegistry::GetLock());
+    EXPECT_EQ(tcache->prev_, nullptr);
+    EXPECT_EQ(tcache->next_, parent_thread_tcache);
+  })};
+  PlatformThreadHandle thread_handle;
+  PlatformThread::Create(0, &delegate, &thread_handle);
+  PlatformThread::Join(thread_handle);
+  AutoLock lock(ThreadCacheRegistry::GetLock());
+  EXPECT_EQ(parent_thread_tcache->prev_, nullptr);
+  EXPECT_EQ(parent_thread_tcache->next_, nullptr);
+}
+#if defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
+TEST_F(ThreadCacheTest, RecordStats) {
+  const size_t kTestSize = 100;
+  auto* tcache = g_root->thread_cache_for_testing();
+  DeltaCounter alloc_counter{tcache->stats_.alloc_count};
+  DeltaCounter alloc_hits_counter{tcache->stats_.alloc_hits};
+  DeltaCounter alloc_miss_counter{tcache->stats_.alloc_misses};
+  DeltaCounter alloc_miss_empty_counter{tcache->stats_.alloc_miss_empty};
+  DeltaCounter cache_fill_counter{tcache->stats_.cache_fill_count};
+  DeltaCounter cache_fill_hits_counter{tcache->stats_.cache_fill_hits};
+  DeltaCounter cache_fill_misses_counter{tcache->stats_.cache_fill_misses};
+  DeltaCounter cache_fill_bucket_full_counter{
+      tcache->stats_.cache_fill_bucket_full};
+  // Cache has been purged, first allocation is a miss.
+  void* data = g_root->Alloc(kTestSize, "");
+  EXPECT_EQ(1u, alloc_counter.Delta());
+  EXPECT_EQ(1u, alloc_miss_counter.Delta());
+  EXPECT_EQ(0u, alloc_hits_counter.Delta());
+  // Cache fill worked.
+  g_root->Free(data);
+  EXPECT_EQ(1u, cache_fill_counter.Delta());
+  EXPECT_EQ(1u, cache_fill_hits_counter.Delta());
+  EXPECT_EQ(0u, cache_fill_misses_counter.Delta());
+  tcache->Purge();
+  cache_fill_counter.Reset();
+  // Bucket full accounting.
+  size_t bucket_index = FillThreadCacheAndReturnIndex(
+      kTestSize, ThreadCache::kMaxCountPerBucket + 10);
+  EXPECT_EQ(ThreadCache::kMaxCountPerBucket + 10, cache_fill_counter.Delta());
+  EXPECT_EQ(10u, cache_fill_bucket_full_counter.Delta());
+  EXPECT_EQ(10u, cache_fill_misses_counter.Delta());
+  // Memory footprint.
+  size_t allocated_size = g_root->buckets[bucket_index].slot_size;
+  ThreadCacheStats stats;
+  ThreadCacheRegistry::Instance().DumpStats(true, &stats);
+  EXPECT_EQ(allocated_size * ThreadCache::kMaxCountPerBucket,
+            stats.bucket_total_memory);
+  EXPECT_EQ(sizeof(ThreadCache), stats.metadata_overhead);
+}
+TEST_F(ThreadCacheTest, MultipleThreadCachesAccounting) {
+  const size_t kTestSize = 100;
+  void* data = g_root->Alloc(kTestSize, "");
+  g_root->Free(data);
+  uint64_t alloc_count = g_root->thread_cache_for_testing()->stats_.alloc_count;
+  LambdaThreadDelegate delegate{BindLambdaForTesting([&]() {
+    EXPECT_FALSE(g_root->thread_cache_for_testing());  // No allocations yet.
+    size_t bucket_index = FillThreadCacheAndReturnIndex(kTestSize);
+    ThreadCacheStats stats;
+    ThreadCacheRegistry::Instance().DumpStats(false, &stats);
+    size_t allocated_size = g_root->buckets[bucket_index].slot_size;
+    // 2* for this thread and the parent one.
+    EXPECT_EQ(2 * allocated_size, stats.bucket_total_memory);
+    EXPECT_EQ(2 * sizeof(ThreadCache), stats.metadata_overhead);
+    uint64_t this_thread_alloc_count =
+        g_root->thread_cache_for_testing()->stats_.alloc_count;
+    EXPECT_EQ(alloc_count + this_thread_alloc_count, stats.alloc_count);
+  })};
+  PlatformThreadHandle thread_handle;
+  PlatformThread::Create(0, &delegate, &thread_handle);
+  PlatformThread::Join(thread_handle);
+}
+#endif  // defined(PA_ENABLE_THREAD_CACHE_STATISTICS)
 }  // namespace internal
 }  // namespace base

--- a/third_party/blink/renderer/platform/instrumentation/partition_alloc_memory_dump_provider.cc
+++ b/third_party/blink/renderer/platform/instrumentation/partition_alloc_memory_dump_provider.cc
@@ -23,6 +23,33 @@ std::string GetPartitionDumpName(const char* partition_name) {
                            kPartitionsDumpName, partition_name);
 }
+void ThreadCacheDump(base::trace_event::MemoryAllocatorDump* thread_cache_dump,
+                     const base::internal::ThreadCacheStats& stats) {
+  thread_cache_dump->AddScalar("alloc_count", "scalar", stats.alloc_count);
+  thread_cache_dump->AddScalar("alloc_hits", "scalar", stats.alloc_hits);
+  thread_cache_dump->AddScalar("alloc_misses", "scalar", stats.alloc_misses);
+  thread_cache_dump->AddScalar("alloc_miss_empty", "scalar",
+                               stats.alloc_miss_empty);
+  thread_cache_dump->AddScalar("alloc_miss_too_large", "scalar",
+                               stats.alloc_miss_too_large);
+  thread_cache_dump->AddScalar("cache_fill_count", "scalar",
+                               stats.cache_fill_count);
+  thread_cache_dump->AddScalar("cache_fill_hits", "scalar",
+                               stats.cache_fill_hits);
+  thread_cache_dump->AddScalar("cache_fill_misses", "scalar",
+                               stats.cache_fill_misses);
+  thread_cache_dump->AddScalar("cache_fill_bucket_full", "scalar",
+                               stats.cache_fill_bucket_full);
+  thread_cache_dump->AddScalar("cache_fill_too_large", "scalar",
+                               stats.cache_fill_too_large);
+  thread_cache_dump->AddScalar("size", "bytes", stats.bucket_total_memory);
+  thread_cache_dump->AddScalar("metadata_overhead", "bytes",
+                               stats.metadata_overhead);
+}
 // This class is used to invert the dependency of PartitionAlloc on the
 // PartitionAllocMemoryDumpProvider. This implements an interface that will
 // be called with memory statistics for each bucket in the allocator.
@@ -71,6 +98,17 @@ void PartitionStatsDumperImpl::PartitionDumpTotals(
                            memory_stats->total_decommittable_bytes);
  allocator_dump->AddScalar("discardable_size", "bytes",
                            memory_stats->total_discardable_bytes);
+  if (memory_stats->has_thread_cache) {
+    const auto& thread_cache_stats = memory_stats->current_thread_cache_stats;
+    auto* thread_cache_dump = memory_dump_->CreateAllocatorDump(
+        dump_name + "/thread_cache/main_thread");
+    ThreadCacheDump(thread_cache_dump, thread_cache_stats);
+    const auto& all_thread_caches_stats = memory_stats->all_thread_caches_stats;
+    auto* all_thread_caches_dump = memory_dump_->CreateAllocatorDump(
+        dump_name + "/thread_cache/all_threads");
+    ThreadCacheDump(all_thread_caches_dump, all_thread_caches_stats);
+  }
 }
 void PartitionStatsDumperImpl::PartitionsDumpBucketStats(
@@ -79,10 +117,10 @@ void PartitionStatsDumperImpl::PartitionsDumpBucketStats(
  DCHECK(memory_stats->is_valid);
  std::string dump_name = GetPartitionDumpName(partition_name);
  if (memory_stats->is_direct_map) {
-    dump_name.append(base::StringPrintf("/directMap_%" PRIu64, ++uid_));
+    dump_name.append(base::StringPrintf("/buckets/directMap_%" PRIu64, ++uid_));
  } else {
-    dump_name.append(
+    dump_name.append(base::StringPrintf("/buckets/bucket_%" PRIu32,
-        base::StringPrintf("/bucket_%" PRIu32, memory_stats->bucket_slot_size));
+                                        memory_stats->bucket_slot_size));
  }
  base::trace_event::MemoryAllocatorDump* allocator_dump =