Commit e033cdf6 authored by Anton Bikineev's avatar Anton Bikineev Committed by Commit Bot

PartitionAlloc: Constant initialize global bucket index lookup table

This CL gets rid of dynamic initialization of the bucket index lookup
table and thereby avoids the data race that happens on reinitialization
of the bucket index lookup table (when the malloc partition is being
allocated from while another partition (e.g. wtf::) is being
initialized).

Change-Id: Ie7ceffec71862e1a5a95433e36b925b8a0122d03
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2560625Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Reviewed-by: default avatarBenoit L <lizeb@chromium.org>
Commit-Queue: Anton Bikineev <bikineev@chromium.org>
Cr-Commit-Position: refs/heads/master@{#831236}
parent dd92a1f7
......@@ -303,38 +303,6 @@ static void PartitionDumpBucketStats(
}
}
template <bool thread_safe>
void InitBucketIndexLookup(PartitionRoot<thread_safe>* root) {
uint16_t* bucket_index_ptr =
&PartitionRoot<thread_safe>::bucket_index_lookup[0];
uint16_t bucket_index = 0;
const uint16_t sentinel_bucket_index = kNumBuckets;
for (uint16_t order = 0; order <= kBitsPerSizeT; ++order) {
for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
if (order < kMinBucketedOrder) {
// Use the bucket of the finest granularity for malloc(0) etc.
*bucket_index_ptr++ = 0;
} else if (order > kMaxBucketedOrder) {
*bucket_index_ptr++ = sentinel_bucket_index;
} else {
uint16_t valid_bucket_index = bucket_index;
while (root->buckets[valid_bucket_index].slot_size % kSmallestBucket)
valid_bucket_index++;
*bucket_index_ptr++ = valid_bucket_index;
bucket_index++;
}
}
}
PA_DCHECK(bucket_index == kNumBuckets);
PA_DCHECK(bucket_index_ptr ==
PartitionRoot<thread_safe>::bucket_index_lookup +
((kBitsPerSizeT + 1) * kNumBucketsPerOrder));
// And there's one last bucket lookup that will be hit for e.g. malloc(-1),
// which tries to overflow to a non-existent order.
*bucket_index_ptr = sentinel_bucket_index;
}
} // namespace internal
// TODO(lizeb): Consider making this constexpr. Without C++17 std::array, this
......@@ -438,11 +406,6 @@ void PartitionRoot<thread_safe>::Init(PartitionOptions opts) {
PA_DCHECK(current_size == 1 << kMaxBucketedOrder);
PA_DCHECK(bucket == &buckets[0] + kNumBuckets);
// Then set up the fast size -> bucket lookup table. We call this multiple
// times, even though the indices are shared between all PartitionRoots, but
// this operation is idempotent, so there is no harm.
internal::InitBucketIndexLookup(this);
#if !defined(PA_THREAD_CACHE_SUPPORTED)
// TLS in ThreadCache not supported on other OSes.
with_thread_cache = false;
......
......@@ -494,6 +494,88 @@ constexpr size_t kOrderSubIndexMask[BITS_PER_SIZE_T + 1] = {
namespace internal {
// The class used to generate the bucket lookup table at compile-time.
class BucketIndexLookup final {
public:
ALWAYS_INLINE constexpr static size_t GetIndex(size_t size);
private:
constexpr BucketIndexLookup() {
constexpr uint16_t sentinel_bucket_index = kNumBuckets;
InitBucketSizes();
uint16_t* bucket_index_ptr = &bucket_index_lookup_[0];
uint16_t bucket_index = 0;
for (uint16_t order = 0; order <= kBitsPerSizeT; ++order) {
for (uint16_t j = 0; j < kNumBucketsPerOrder; ++j) {
if (order < kMinBucketedOrder) {
// Use the bucket of the finest granularity for malloc(0) etc.
*bucket_index_ptr++ = 0;
} else if (order > kMaxBucketedOrder) {
*bucket_index_ptr++ = sentinel_bucket_index;
} else {
uint16_t valid_bucket_index = bucket_index;
while (bucket_sizes_[valid_bucket_index] % kSmallestBucket)
valid_bucket_index++;
*bucket_index_ptr++ = valid_bucket_index;
bucket_index++;
}
}
}
PA_DCHECK(bucket_index == kNumBuckets);
PA_DCHECK(bucket_index_ptr == bucket_index_lookup_ + ((kBitsPerSizeT + 1) *
kNumBucketsPerOrder));
// And there's one last bucket lookup that will be hit for e.g. malloc(-1),
// which tries to overflow to a non-existent order.
*bucket_index_ptr = sentinel_bucket_index;
}
constexpr void InitBucketSizes() {
size_t current_size = kSmallestBucket;
size_t current_increment = kSmallestBucket >> kNumBucketsPerOrderBits;
size_t* bucket_size = &bucket_sizes_[0];
for (size_t i = 0; i < kNumBucketedOrders; ++i) {
for (size_t j = 0; j < kNumBucketsPerOrder; ++j) {
*bucket_size = current_size;
// Disable pseudo buckets so that touching them faults.
current_size += current_increment;
++bucket_size;
}
current_increment <<= 1;
}
}
size_t bucket_sizes_[kNumBuckets]{};
// The bucket lookup table lets us map a size_t to a bucket quickly.
// The trailing +1 caters for the overflow case for very large allocation
// sizes. It is one flat array instead of a 2D array because in the 2D
// world, we'd need to index array[blah][max+1] which risks undefined
// behavior.
uint16_t
bucket_index_lookup_[((kBitsPerSizeT + 1) * kNumBucketsPerOrder) + 1]{};
};
// static
ALWAYS_INLINE constexpr size_t BucketIndexLookup::GetIndex(size_t size) {
// This forces the bucket table to be constant-initialized and immediately
// materialized in the binary.
constexpr BucketIndexLookup lookup{};
const size_t order = kBitsPerSizeT - bits::CountLeadingZeroBitsSizeT(size);
// The order index is simply the next few bits after the most significant
// bit.
const size_t order_index =
(size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
// And if the remaining bits are non-zero we must bump the bucket up.
const size_t sub_order_index = size & kOrderSubIndexMask[order];
const uint16_t index =
lookup.bucket_index_lookup_[(order << kNumBucketsPerOrderBits) +
order_index + !!sub_order_index];
PA_DCHECK(index <= kNumBuckets); // Last one is the sentinel bucket.
return index;
}
// Gets the SlotSpanMetadata object of the slot span that contains |ptr|. It's
// used with intention to do obtain the slot size. CAUTION! It works well for
// normal buckets, but for direct-mapped allocations it'll only work if |ptr| is
......@@ -825,16 +907,7 @@ ALWAYS_INLINE size_t PartitionRoot<thread_safe>::GetSize(void* ptr) const {
template <bool thread_safe>
ALWAYS_INLINE uint16_t
PartitionRoot<thread_safe>::SizeToBucketIndex(size_t size) {
size_t order = kBitsPerSizeT - bits::CountLeadingZeroBitsSizeT(size);
// The order index is simply the next few bits after the most significant bit.
size_t order_index =
(size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
// And if the remaining bits are non-zero we must bump the bucket up.
size_t sub_order_index = size & kOrderSubIndexMask[order];
uint16_t index = bucket_index_lookup[(order << kNumBucketsPerOrderBits) +
order_index + !!sub_order_index];
PA_DCHECK(index <= kNumBuckets); // Last one is the sentinetl bucket.
return index;
return internal::BucketIndexLookup::GetIndex(size);
}
template <bool thread_safe>
......
......@@ -164,7 +164,7 @@ ALWAYS_INLINE uint64_t CountLeadingZeroBits64(uint64_t x) {
// instructions have defined behaviour for 0. We could drop to raw __asm__ to
// do better, but we'll avoid doing that unless we see proof that we need to.
template <typename T, unsigned bits = sizeof(T) * 8>
ALWAYS_INLINE
ALWAYS_INLINE constexpr
typename std::enable_if<std::is_unsigned<T>::value && sizeof(T) <= 8,
unsigned>::type
CountLeadingZeroBits(T value) {
......@@ -177,7 +177,7 @@ ALWAYS_INLINE
}
template <typename T, unsigned bits = sizeof(T) * 8>
ALWAYS_INLINE
ALWAYS_INLINE constexpr
typename std::enable_if<std::is_unsigned<T>::value && sizeof(T) <= 8,
unsigned>::type
CountTrailingZeroBits(T value) {
......@@ -187,31 +187,31 @@ ALWAYS_INLINE
: bits;
}
ALWAYS_INLINE uint32_t CountLeadingZeroBits32(uint32_t x) {
ALWAYS_INLINE constexpr uint32_t CountLeadingZeroBits32(uint32_t x) {
return CountLeadingZeroBits(x);
}
ALWAYS_INLINE uint64_t CountLeadingZeroBits64(uint64_t x) {
ALWAYS_INLINE constexpr uint64_t CountLeadingZeroBits64(uint64_t x) {
return CountLeadingZeroBits(x);
}
#endif
ALWAYS_INLINE size_t CountLeadingZeroBitsSizeT(size_t x) {
ALWAYS_INLINE constexpr size_t CountLeadingZeroBitsSizeT(size_t x) {
return CountLeadingZeroBits(x);
}
ALWAYS_INLINE size_t CountTrailingZeroBitsSizeT(size_t x) {
ALWAYS_INLINE constexpr size_t CountTrailingZeroBitsSizeT(size_t x) {
return CountTrailingZeroBits(x);
}
// Returns the integer i such as 2^i <= n < 2^(i+1)
inline int Log2Floor(uint32_t n) {
constexpr int Log2Floor(uint32_t n) {
return 31 - CountLeadingZeroBits(n);
}
// Returns the integer i such as 2^(i-1) < n <= 2^i
inline int Log2Ceiling(uint32_t n) {
constexpr int Log2Ceiling(uint32_t n) {
// When n == 0, we want the function to return -1.
// When n == 0, (n - 1) will underflow to 0xFFFFFFFF, which is
// why the statement below starts with (n ? 32 : -1).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment