Commit f7757107 authored by Benoit Lize's avatar Benoit Lize Committed by Chromium LUCI CQ

[PartitionAlloc] Record and report batch fill rate.

The thread cache uses batched allocation from the central
allocator. Rather than using the cache hit rate to assess contention on
the main lock, the number of batch fill requests is the one to track.

This CL records this metric, and reports it in memory dumps and in UMA.

Bug: 998048
Change-Id: Ie6a833c7a0ece66d3c138757c8fb3f49a1690c7f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2593630Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Reviewed-by: default avatarssid <ssid@chromium.org>
Reviewed-by: default avatarJesse Doherty <jwd@chromium.org>
Commit-Queue: Benoit L <lizeb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#838015}
parent 41522c8a
......@@ -28,6 +28,8 @@ struct ThreadCacheStats {
uint64_t cache_fill_hits;
uint64_t cache_fill_misses; // Object too large.
uint64_t batch_fill_count; // Number of central allocator requests.
// Memory cost:
uint64_t bucket_total_memory;
uint64_t metadata_overhead;
......
......@@ -301,6 +301,8 @@ void ThreadCache::FillBucket(size_t bucket_index) {
// clearing which would greatly increase calls to the central allocator. (3)
// tries to keep memory usage low. So clearing half of the bucket, and filling
// a quarter of it are sensible defaults.
INCREMENT_COUNTER(stats_.batch_fill_count);
Bucket& bucket = buckets_[bucket_index];
int count = bucket.limit / kBatchFillRatio;
......@@ -388,6 +390,8 @@ void ThreadCache::ResetForTesting() {
stats_.cache_fill_hits = 0;
stats_.cache_fill_misses = 0;
stats_.batch_fill_count = 0;
stats_.bucket_total_memory = 0;
stats_.metadata_overhead = 0;
......@@ -407,9 +411,11 @@ void ThreadCache::AccumulateStats(ThreadCacheStats* stats) const {
stats->cache_fill_hits += stats_.cache_fill_hits;
stats->cache_fill_misses += stats_.cache_fill_misses;
for (size_t i = 0; i < kBucketCount; i++) {
stats->batch_fill_count += stats_.batch_fill_count;
for (const Bucket& bucket : buckets_) {
stats->bucket_total_memory +=
buckets_[i].count * static_cast<size_t>(buckets_[i].slot_size);
bucket.count * static_cast<size_t>(bucket.slot_size);
}
stats->metadata_overhead += sizeof(*this);
}
......
......@@ -257,6 +257,8 @@ class BASE_EXPORT ThreadCache {
friend class ThreadCacheRegistry;
friend class ThreadCacheTest;
FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, Simple);
FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleObjectsCachedPerBucket);
FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, LargeAllocationsAreNotCached);
FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCaches);
FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, RecordStats);
......
......@@ -121,12 +121,13 @@ class ThreadCacheTest : public ::testing::Test {
};
TEST_F(ThreadCacheTest, Simple) {
void* ptr = g_root->Alloc(kSmallSize, "");
ASSERT_TRUE(ptr);
// There is a cache.
auto* tcache = g_root->thread_cache_for_testing();
EXPECT_TRUE(tcache);
DeltaCounter batch_fill_counter{tcache->stats_.batch_fill_count};
void* ptr = g_root->Alloc(kSmallSize, "");
ASSERT_TRUE(ptr);
uint16_t index = PartitionRoot<ThreadSafe>::SizeToBucketIndex(kSmallSize);
EXPECT_EQ(kFillCountForSmallBucket - 1,
......@@ -141,6 +142,8 @@ TEST_F(ThreadCacheTest, Simple) {
// Allocated from the thread cache.
EXPECT_EQ(kFillCountForSmallBucket - 1,
tcache->bucket_count_for_testing(index));
EXPECT_EQ(1u, batch_fill_counter.Delta());
}
TEST_F(ThreadCacheTest, InexactSizeMatch) {
......@@ -167,11 +170,15 @@ TEST_F(ThreadCacheTest, InexactSizeMatch) {
}
TEST_F(ThreadCacheTest, MultipleObjectsCachedPerBucket) {
auto* tcache = g_root->thread_cache_for_testing();
DeltaCounter batch_fill_counter{tcache->stats_.batch_fill_count};
size_t bucket_index =
FillThreadCacheAndReturnIndex(kMediumSize, kFillCountForMediumBucket + 2);
auto* tcache = g_root->thread_cache_for_testing();
EXPECT_EQ(2 * kFillCountForMediumBucket,
tcache->bucket_count_for_testing(bucket_index));
// 2 batches, since there were more than |kFillCountForMediumBucket|
// allocations.
EXPECT_EQ(2u, batch_fill_counter.Delta());
}
TEST_F(ThreadCacheTest, ObjectsCachedCountIsLimited) {
......
......@@ -227,6 +227,8 @@ void ReportPartitionAllocThreadCacheStats(MemoryAllocatorDump* dump,
dump->AddScalar("cache_fill_hits", "scalar", stats.cache_fill_hits);
dump->AddScalar("cache_fill_misses", "scalar", stats.cache_fill_misses);
dump->AddScalar("batch_fill_count", "scalar", stats.batch_fill_count);
dump->AddScalar("size", "bytes", stats.bucket_total_memory);
dump->AddScalar("metadata_overhead", "bytes", stats.metadata_overhead);
}
......
......@@ -92,6 +92,13 @@ void PartitionStatsDumperImpl::PartitionDumpTotals(
all_thread_caches_stats.alloc_count);
base::UmaHistogramPercentage("Memory.PartitionAlloc.ThreadCache.HitRate",
hit_rate_percent);
int batch_fill_rate_percent =
static_cast<int>((100 * all_thread_caches_stats.batch_fill_count) /
all_thread_caches_stats.alloc_count);
base::UmaHistogramPercentage(
"Memory.PartitionAlloc.ThreadCache.BatchFillRate",
batch_fill_rate_percent);
}
if (thread_cache_stats.alloc_count) {
......@@ -101,6 +108,13 @@ void PartitionStatsDumperImpl::PartitionDumpTotals(
base::UmaHistogramPercentage(
"Memory.PartitionAlloc.ThreadCache.HitRate.MainThread",
hit_rate_percent);
int batch_fill_rate_percent =
static_cast<int>((100 * thread_cache_stats.batch_fill_count) /
thread_cache_stats.alloc_count);
base::UmaHistogramPercentage(
"Memory.PartitionAlloc.ThreadCache.BatchFillRate.MainThread",
batch_fill_rate_percent);
}
}
}
......
......@@ -30,11 +30,20 @@ TEST(PartitionAllocMemoryDumpProviderTest, Simple) {
1);
histogram_tester.ExpectTotalCount(
"Memory.PartitionAlloc.ThreadCache.HitRate.MainThread", 1);
histogram_tester.ExpectTotalCount(
"Memory.PartitionAlloc.ThreadCache.BatchFillRate", 1);
histogram_tester.ExpectTotalCount(
"Memory.PartitionAlloc.ThreadCache.HitRate.MainThread", 1);
#else
histogram_tester.ExpectTotalCount("Memory.PartitionAlloc.ThreadCache.HitRate",
0);
histogram_tester.ExpectTotalCount(
"Memory.PartitionAlloc.ThreadCache.HitRate.MainThread", 0);
histogram_tester.ExpectTotalCount(
"Memory.PartitionAlloc.ThreadCache.BatchFillRate", 0);
histogram_tester.ExpectTotalCount(
"Memory.PartitionAlloc.ThreadCache.BatchFillRate.MainThread", 0);
#endif // !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC) &&
// defined(PA_THREAD_CACHE_SUPPORTED) &&
// !defined(MEMORY_TOOL_REPLACES_ALLOCATOR)
......
......@@ -1995,6 +1995,22 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
<summary>Throughput of a ParkableString disk write.</summary>
</histogram>
<histogram name="Memory.PartitionAlloc.ThreadCache.BatchFillRate{ThreadType}"
units="%" expires_after="M92">
<owner>lizeb@chromium.org</owner>
<owner>bartekn@chromium.org</owner>
<summary>
Fraction of PartitionAlloc's thread cache allocations requests that required
a batch fill, that is cache hits touching the central allocator. Recorded
during memory dumps, at the same time as the Memory.*.PartitionAlloc.*
histograms. Data is collected for {ThreadType}.
</summary>
<token key="ThreadType">
<variant name="" summary="all threads"/>
<variant name=".MainThread" summary="the main thread only"/>
</token>
</histogram>
<histogram name="Memory.PartitionAlloc.ThreadCache.HitRate{ThreadType}"
units="%" expires_after="M92">
<owner>lizeb@chromium.org</owner>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment