Commit c2718dca authored by Benoit Lize's avatar Benoit Lize Committed by Commit Bot

[base/allocator] Make constants in PartitionRoot constexpr.

PartitionRoot precomputes constants that are needed in the hot path, to
save branches. These constants are the same across all PartitionRoots,
and they could be known at compile-time.

This CL precomputes these at compile time, which saves ~1kiB per
PartitionRoot, and should improve cache locality, as we have multiple
partitions in use at any point.

Bug: 998048
Change-Id: I36ff92b8df684c7788a1ffee7cfb160d3e4cc98b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2375248
Commit-Queue: Benoit L <lizeb@chromium.org>
Reviewed-by: default avatarBartek Nowierski <bartekn@chromium.org>
Cr-Commit-Position: refs/heads/master@{#804886}
parent c659a430
......@@ -228,33 +228,6 @@ void PartitionRoot<thread_safe>::Init(bool enforce_alignment) {
// This is a "magic" value so we can test if a root pointer is valid.
inverted_self = ~reinterpret_cast<uintptr_t>(this);
// Precalculate some shift and mask constants used in the hot path.
// Example: malloc(41) == 101001 binary.
// Order is 6 (1 << 6-1) == 32 is highest bit set.
// order_index is the next three MSB == 010 == 2.
// sub_order_index_mask is a mask for the remaining bits == 11 (masking to 01
// for
// the sub_order_index).
size_t order;
for (order = 0; order <= kBitsPerSizeT; ++order) {
size_t order_index_shift;
if (order < kNumBucketsPerOrderBits + 1)
order_index_shift = 0;
else
order_index_shift = order - (kNumBucketsPerOrderBits + 1);
order_index_shifts[order] = order_index_shift;
size_t sub_order_index_mask;
if (order == kBitsPerSizeT) {
// This avoids invoking undefined behavior for an excessive shift.
sub_order_index_mask =
static_cast<size_t>(-1) >> (kNumBucketsPerOrderBits + 1);
} else {
sub_order_index_mask = ((static_cast<size_t>(1) << order) - 1) >>
(kNumBucketsPerOrderBits + 1);
}
order_sub_index_masks[order] = sub_order_index_mask;
}
// Set up the actual usable buckets first.
// Note that typical values (i.e. min allocation size of 8) will result in
// pseudo buckets (size==9 etc. or more generally, size is not a multiple
......@@ -282,7 +255,7 @@ void PartitionRoot<thread_safe>::Init(bool enforce_alignment) {
// Then set up the fast size -> bucket lookup table.
bucket = &buckets[0];
Bucket** bucket_ptr = &bucket_lookups[0];
for (order = 0; order <= kBitsPerSizeT; ++order) {
for (size_t order = 0; order <= kBitsPerSizeT; ++order) {
for (j = 0; j < kNumBucketsPerOrder; ++j) {
if (order < kMinBucketedOrder) {
// Use the bucket of the finest granularity for malloc(0) etc.
......
......@@ -265,6 +265,92 @@ class BASE_EXPORT PartitionStatsDumper {
const PartitionBucketMemoryStats*) = 0;
};
namespace {
// Precalculate some shift and mask constants used in the hot path.
// Example: malloc(41) == 101001 binary.
// Order is 6 (1 << 6-1) == 32 is highest bit set.
// order_index is the next three MSB == 010 == 2.
// sub_order_index_mask is a mask for the remaining bits == 11 (masking to 01
// for the sub_order_index).
constexpr size_t OrderIndexShift(size_t order) {
if (order < kNumBucketsPerOrderBits + 1)
return 0;
return order - (kNumBucketsPerOrderBits + 1);
}
constexpr size_t OrderSubIndexMask(size_t order) {
if (order == kBitsPerSizeT)
return static_cast<size_t>(-1) >> (kNumBucketsPerOrderBits + 1);
return ((static_cast<size_t>(1) << order) - 1) >>
(kNumBucketsPerOrderBits + 1);
}
#if defined(ARCH_CPU_64_BITS) && !defined(OS_NACL)
#define BITS_PER_SIZE_T 64
static_assert(kBitsPerSizeT == 64, "");
#else
#define BITS_PER_SIZE_T 32
static_assert(kBitsPerSizeT == 32, "");
#endif
constexpr size_t kOrderIndexShift[BITS_PER_SIZE_T + 1] = {
OrderIndexShift(0), OrderIndexShift(1), OrderIndexShift(2),
OrderIndexShift(3), OrderIndexShift(4), OrderIndexShift(5),
OrderIndexShift(6), OrderIndexShift(7), OrderIndexShift(8),
OrderIndexShift(9), OrderIndexShift(10), OrderIndexShift(11),
OrderIndexShift(12), OrderIndexShift(13), OrderIndexShift(14),
OrderIndexShift(15), OrderIndexShift(16), OrderIndexShift(17),
OrderIndexShift(18), OrderIndexShift(19), OrderIndexShift(20),
OrderIndexShift(21), OrderIndexShift(22), OrderIndexShift(23),
OrderIndexShift(24), OrderIndexShift(25), OrderIndexShift(26),
OrderIndexShift(27), OrderIndexShift(28), OrderIndexShift(29),
OrderIndexShift(30), OrderIndexShift(31), OrderIndexShift(32),
#if BITS_PER_SIZE_T == 64
OrderIndexShift(33), OrderIndexShift(34), OrderIndexShift(35),
OrderIndexShift(36), OrderIndexShift(37), OrderIndexShift(38),
OrderIndexShift(39), OrderIndexShift(40), OrderIndexShift(41),
OrderIndexShift(42), OrderIndexShift(43), OrderIndexShift(44),
OrderIndexShift(45), OrderIndexShift(46), OrderIndexShift(47),
OrderIndexShift(48), OrderIndexShift(49), OrderIndexShift(50),
OrderIndexShift(51), OrderIndexShift(52), OrderIndexShift(53),
OrderIndexShift(54), OrderIndexShift(55), OrderIndexShift(56),
OrderIndexShift(57), OrderIndexShift(58), OrderIndexShift(59),
OrderIndexShift(60), OrderIndexShift(61), OrderIndexShift(62),
OrderIndexShift(63), OrderIndexShift(64)
#endif
};
constexpr size_t kOrderSubIndexMask[BITS_PER_SIZE_T + 1] = {
OrderSubIndexMask(0), OrderSubIndexMask(1), OrderSubIndexMask(2),
OrderSubIndexMask(3), OrderSubIndexMask(4), OrderSubIndexMask(5),
OrderSubIndexMask(6), OrderSubIndexMask(7), OrderSubIndexMask(8),
OrderSubIndexMask(9), OrderSubIndexMask(10), OrderSubIndexMask(11),
OrderSubIndexMask(12), OrderSubIndexMask(13), OrderSubIndexMask(14),
OrderSubIndexMask(15), OrderSubIndexMask(16), OrderSubIndexMask(17),
OrderSubIndexMask(18), OrderSubIndexMask(19), OrderSubIndexMask(20),
OrderSubIndexMask(21), OrderSubIndexMask(22), OrderSubIndexMask(23),
OrderSubIndexMask(24), OrderSubIndexMask(25), OrderSubIndexMask(26),
OrderSubIndexMask(27), OrderSubIndexMask(28), OrderSubIndexMask(29),
OrderSubIndexMask(30), OrderSubIndexMask(31), OrderSubIndexMask(32),
#if BITS_PER_SIZE_T == 64
OrderSubIndexMask(33), OrderSubIndexMask(34), OrderSubIndexMask(35),
OrderSubIndexMask(36), OrderSubIndexMask(37), OrderSubIndexMask(38),
OrderSubIndexMask(39), OrderSubIndexMask(40), OrderSubIndexMask(41),
OrderSubIndexMask(42), OrderSubIndexMask(43), OrderSubIndexMask(44),
OrderSubIndexMask(45), OrderSubIndexMask(46), OrderSubIndexMask(47),
OrderSubIndexMask(48), OrderSubIndexMask(49), OrderSubIndexMask(50),
OrderSubIndexMask(51), OrderSubIndexMask(52), OrderSubIndexMask(53),
OrderSubIndexMask(54), OrderSubIndexMask(55), OrderSubIndexMask(56),
OrderSubIndexMask(57), OrderSubIndexMask(58), OrderSubIndexMask(59),
OrderSubIndexMask(60), OrderSubIndexMask(61), OrderSubIndexMask(62),
OrderSubIndexMask(63), OrderSubIndexMask(64)
#endif
};
} // namespace
// Never instantiate a PartitionRoot directly, instead use
// PartitionAllocator.
template <bool thread_safe>
......@@ -304,9 +390,6 @@ struct BASE_EXPORT PartitionRoot {
char* next_tag_bitmap_page = nullptr;
#endif
// Some pre-computed constants.
size_t order_index_shifts[kBitsPerSizeT + 1] = {};
size_t order_sub_index_masks[kBitsPerSizeT + 1] = {};
// The bucket lookup table lets us map a size_t to a bucket quickly.
// The trailing +1 caters for the overflow case for very large allocation
// sizes. It is one flat array instead of a 2D array because in the 2D
......@@ -696,9 +779,9 @@ PartitionRoot<thread_safe>::SizeToBucket(size_t size) const {
size_t order = kBitsPerSizeT - bits::CountLeadingZeroBitsSizeT(size);
// The order index is simply the next few bits after the most significant bit.
size_t order_index =
(size >> order_index_shifts[order]) & (kNumBucketsPerOrder - 1);
(size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
// And if the remaining bits are non-zero we must bump the bucket up.
size_t sub_order_index = size & order_sub_index_masks[order];
size_t sub_order_index = size & kOrderSubIndexMask[order];
Bucket* bucket = bucket_lookups[(order << kNumBucketsPerOrderBits) +
order_index + !!sub_order_index];
PA_CHECK(bucket);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment