Commit 47a8282a authored by Benoit Lize's avatar Benoit Lize Committed by Commit Bot

[PartitionAlloc] Use the thread cache in perftests.

On gLinux, this shows that with the thread cache, for tests that hit the
cache:
- PartitionAloc is ~2x faster with the thread cache on a *single* thread
- The thread cache makes PartitionAlloc an order of magnitude faster at
  least with 4 threads (on a machine with 20 physical cores)
- It is competitive with the System allocator (glibc) no matter the
  number of threads.

See below:
1. One thread:
[ RUN      ] MemoryAllocationPerfTest.MultiBucketWithFree/0
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_System_1_total= 49925308 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_System_1_total= 20 ns
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_System_1_worst= 49925308 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_System_1_worst= 20 ns
[       OK ] MemoryAllocationPerfTest.MultiBucketWithFree/0 (2039 ms)
[ RUN      ] MemoryAllocationPerfTest.MultiBucketWithFree/1
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_1_total= 25700384 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_1_total= 38 ns
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_1_worst= 25700384 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_1_worst= 38 ns
[       OK ] MemoryAllocationPerfTest.MultiBucketWithFree/1 (2063 ms)
[ RUN      ] MemoryAllocationPerfTest.MultiBucketWithFree/2
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_1_total= 47524712 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_1_total= 21 ns
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_1_worst= 47524712 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_1_worst= 21 ns

2. 4 threads
[ RUN      ] MemoryAllocationPerfTest.MultiBucketWithFree/9
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_System_4_total= 126134856 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_System_4_total= 7 ns
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_System_4_worst= 28002580 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_System_4_worst= 35 ns
[       OK ] MemoryAllocationPerfTest.MultiBucketWithFree/9 (2049 ms)
[ RUN      ] MemoryAllocationPerfTest.MultiBucketWithFree/10
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_4_total= 2960527 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_4_total= 337 ns
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_4_worst= 699706 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAlloc_4_worst= 1429 ns
[       OK ] MemoryAllocationPerfTest.MultiBucketWithFree/10 (3352 ms)
[ RUN      ] MemoryAllocationPerfTest.MultiBucketWithFree/11
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_4_total= 95663528 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_4_total= 10 ns
*RESULT MemoryAllocationthroughput: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_4_worst= 21672892 runs/s
*RESULT MemoryAllocationtime_per_allocation: MemoryAllocation.MultiBucketWithFree_PartitionAllocWithThreadCache_4_worst= 46 ns

Bug: 998048
Change-Id: I13ac85714fae343871f0f448f0a36729e8ad4c20
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2491366
Commit-Queue: Benoit L <lizeb@chromium.org>
Reviewed-by: default avatarYuki Shiino <yukishiino@chromium.org>
Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Cr-Commit-Position: refs/heads/master@{#820265}
parent 4dee8368
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "base/allocator/partition_allocator/partition_alloc.h" #include "base/allocator/partition_allocator/partition_alloc.h"
#include "base/allocator/partition_allocator/partition_alloc_check.h" #include "base/allocator/partition_allocator/partition_alloc_check.h"
#include "base/allocator/partition_allocator/thread_cache.h"
#include "base/bind.h" #include "base/bind.h"
#include "base/callback.h" #include "base/callback.h"
#include "base/logging.h" #include "base/logging.h"
...@@ -56,7 +57,11 @@ perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) { ...@@ -56,7 +57,11 @@ perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
return reporter; return reporter;
} }
enum class AllocatorType { kSystem, kPartitionAlloc }; enum class AllocatorType {
kSystem,
kPartitionAlloc,
kPartitionAllocWithThreadCache
};
class Allocator { class Allocator {
public: public:
...@@ -89,6 +94,26 @@ class PartitionAllocator : public Allocator { ...@@ -89,6 +94,26 @@ class PartitionAllocator : public Allocator {
PartitionOptions::ThreadCache::kDisabled}}; PartitionOptions::ThreadCache::kDisabled}};
}; };
// Only one partition with a thread cache.
ThreadSafePartitionRoot* g_partition_root = nullptr;
class PartitionAllocatorWithThreadCache : public Allocator {
public:
PartitionAllocatorWithThreadCache() {
if (!g_partition_root) {
g_partition_root = new ThreadSafePartitionRoot(
{PartitionOptions::Alignment::kRegular,
PartitionOptions::ThreadCache::kEnabled});
}
internal::ThreadCacheRegistry::Instance().PurgeAll();
}
~PartitionAllocatorWithThreadCache() override = default;
void* Alloc(size_t size) override {
return g_partition_root->AllocFlagsNoHooks(0, size);
}
void Free(void* data) override { ThreadSafePartitionRoot::FreeNoHooks(data); }
};
class TestLoopThread : public PlatformThread::Delegate { class TestLoopThread : public PlatformThread::Delegate {
public: public:
explicit TestLoopThread(OnceCallback<float()> test_fn) explicit TestLoopThread(OnceCallback<float()> test_fn)
...@@ -252,9 +277,14 @@ float MultiBucketWithFree(Allocator* allocator) { ...@@ -252,9 +277,14 @@ float MultiBucketWithFree(Allocator* allocator) {
} }
std::unique_ptr<Allocator> CreateAllocator(AllocatorType type) { std::unique_ptr<Allocator> CreateAllocator(AllocatorType type) {
if (type == AllocatorType::kSystem) switch (type) {
return std::make_unique<SystemAllocator>(); case AllocatorType::kSystem:
return std::make_unique<PartitionAllocator>(); return std::make_unique<SystemAllocator>();
case AllocatorType::kPartitionAlloc:
return std::make_unique<PartitionAllocator>();
case AllocatorType::kPartitionAllocWithThreadCache:
return std::make_unique<PartitionAllocatorWithThreadCache>();
}
} }
void LogResults(int thread_count, void LogResults(int thread_count,
...@@ -286,10 +316,22 @@ void RunTest(int thread_count, ...@@ -286,10 +316,22 @@ void RunTest(int thread_count,
total_laps_per_second += laps_per_second; total_laps_per_second += laps_per_second;
} }
std::string name = base::StringPrintf( char const* alloc_type_str;
"%s.%s_%s_%d", kMetricPrefixMemoryAllocation, story_base_name, switch (alloc_type) {
alloc_type == AllocatorType::kSystem ? "System" : "PartitionAlloc", case AllocatorType::kSystem:
thread_count); alloc_type_str = "System";
break;
case AllocatorType::kPartitionAlloc:
alloc_type_str = "PartitionAlloc";
break;
case AllocatorType::kPartitionAllocWithThreadCache:
alloc_type_str = "PartitionAllocWithThreadCache";
break;
}
std::string name =
base::StringPrintf("%s.%s_%s_%d", kMetricPrefixMemoryAllocation,
story_base_name, alloc_type_str, thread_count);
DisplayResults(name + "_total", total_laps_per_second); DisplayResults(name + "_total", total_laps_per_second);
DisplayResults(name + "_worst", min_laps_per_second); DisplayResults(name + "_worst", min_laps_per_second);
...@@ -300,12 +342,20 @@ void RunTest(int thread_count, ...@@ -300,12 +342,20 @@ void RunTest(int thread_count,
class MemoryAllocationPerfTest class MemoryAllocationPerfTest
: public testing::TestWithParam<std::tuple<int, AllocatorType>> {}; : public testing::TestWithParam<std::tuple<int, AllocatorType>> {};
// Only one partition with a thread cache: cannot use the thread cache when
// PartitionAlloc is malloc().
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
, ,
MemoryAllocationPerfTest, MemoryAllocationPerfTest,
::testing::Combine(::testing::Values(1, 2, 3, 4), ::testing::Combine(
::testing::Values(AllocatorType::kSystem, ::testing::Values(1, 2, 3, 4),
AllocatorType::kPartitionAlloc))); ::testing::Values(AllocatorType::kSystem,
AllocatorType::kPartitionAlloc
#if !BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
,
AllocatorType::kPartitionAllocWithThreadCache
#endif
)));
// This test (and the other one below) allocates a large amount of memory, which // This test (and the other one below) allocates a large amount of memory, which
// can cause issues on Android. // can cause issues on Android.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment