Commit 9f27c1bf authored by Sergei Glazunov's avatar Sergei Glazunov Committed by Commit Bot

Reland "[PartitionAlloc] Use reciprocal multiplication to speed up GetSlotOffset"

This is a reland of 4d9450df

Original change's description:
> [PartitionAlloc] Use reciprocal multiplication to speed up GetSlotOffset
>
> Currently, the "get offset of a pointer within its allocated slot"
> functionality, which is used extensively by several MiraclePtr
> implementations, performs an expensive modulo operation. This commit
> replaces it with a cheaper sequence of multiplications and bit shifts.
>
> Bug: 1092288
> Change-Id: I866689a79fff704b5f83fc07d8f28b3386ef644d
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2335443
> Commit-Queue: Sergei Glazunov <glazunov@google.com>
> Reviewed-by: Benoit L <lizeb@chromium.org>
> Reviewed-by: Bartek Nowierski <bartekn@chromium.org>
> Reviewed-by: Kentaro Hara <haraken@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#802194}

Bug: 1092288
Change-Id: Iec17efe8f9837f55c2f40dcf80033e6efd9580dc
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2380277
Commit-Queue: Sergei Glazunov <glazunov@google.com>
Reviewed-by: default avatarBenoit L <lizeb@chromium.org>
Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Cr-Commit-Position: refs/heads/master@{#802700}
parent 9380a440
...@@ -62,9 +62,6 @@ static_assert(!(kPartitionPageSize % kSystemPageSize), ...@@ -62,9 +62,6 @@ static_assert(!(kPartitionPageSize % kSystemPageSize),
static_assert(sizeof(internal::PartitionPage<internal::ThreadSafe>) <= static_assert(sizeof(internal::PartitionPage<internal::ThreadSafe>) <=
kPageMetadataSize, kPageMetadataSize,
"PartitionPage should not be too big"); "PartitionPage should not be too big");
static_assert(sizeof(internal::PartitionBucket<internal::ThreadSafe>) <=
kPageMetadataSize,
"PartitionBucket should not be too big");
static_assert(kPageMetadataSize * kNumPartitionPagesPerSuperPage <= static_assert(kPageMetadataSize * kNumPartitionPagesPerSuperPage <=
kSystemPageSize, kSystemPageSize,
"page metadata fits in hole"); "page metadata fits in hole");
......
...@@ -818,18 +818,14 @@ ALWAYS_INLINE size_t PartitionAllocGetSlotOffset(void* ptr) { ...@@ -818,18 +818,14 @@ ALWAYS_INLINE size_t PartitionAllocGetSlotOffset(void* ptr) {
auto* page = auto* page =
internal::PartitionAllocGetPageForSize<internal::ThreadSafe>(ptr); internal::PartitionAllocGetPageForSize<internal::ThreadSafe>(ptr);
PA_DCHECK(PartitionRoot<internal::ThreadSafe>::FromPage(page)->allow_extras); PA_DCHECK(PartitionRoot<internal::ThreadSafe>::FromPage(page)->allow_extras);
size_t slot_size = page->bucket->slot_size;
// Get the offset from the beginning of the slot span. // Get the offset from the beginning of the slot span.
uintptr_t ptr_addr = reinterpret_cast<uintptr_t>(ptr); uintptr_t ptr_addr = reinterpret_cast<uintptr_t>(ptr);
uintptr_t slot_span_start = reinterpret_cast<uintptr_t>( uintptr_t slot_span_start = reinterpret_cast<uintptr_t>(
internal::PartitionPage<internal::ThreadSafe>::ToPointer(page)); internal::PartitionPage<internal::ThreadSafe>::ToPointer(page));
size_t offset_in_slot_span = ptr_addr - slot_span_start; size_t offset_in_slot_span = ptr_addr - slot_span_start;
// Knowing that slots are tightly packed in a slot span, calculate an offset
// within a slot using simple % operation. return page->bucket->GetSlotOffset(offset_in_slot_span);
// TODO(bartekn): Try to replace % with multiplication&shift magic.
size_t offset_in_slot = offset_in_slot_span % slot_size;
return offset_in_slot;
} }
#endif // BUILDFLAG(USE_PARTITION_ALLOC) #endif // BUILDFLAG(USE_PARTITION_ALLOC)
......
...@@ -897,6 +897,22 @@ TEST_F(PartitionAllocTest, AllocGetSizeAndOffset) { ...@@ -897,6 +897,22 @@ TEST_F(PartitionAllocTest, AllocGetSizeAndOffset) {
offset); offset);
} }
#endif #endif
// Allocate the maximum allowed bucketed size.
requested_size = kMaxBucketed - kExtraAllocSize;
predicted_size = allocator.root()->ActualSize(requested_size);
ptr = allocator.root()->Alloc(requested_size, type_name);
EXPECT_TRUE(ptr);
actual_size = allocator.root()->GetSize(ptr);
EXPECT_EQ(predicted_size, actual_size);
EXPECT_EQ(requested_size, actual_size);
#if defined(ARCH_CPU_64_BITS) && !defined(OS_NACL)
for (size_t offset = 0; offset < requested_size; offset += 4999) {
EXPECT_EQ(PartitionAllocGetSlotOffset(static_cast<char*>(ptr) + offset),
offset);
}
#endif
// Check that we can write at the end of the reported size too. // Check that we can write at the end of the reported size too.
char* char_ptr = reinterpret_cast<char*>(ptr); char* char_ptr = reinterpret_cast<char*>(ptr);
*(char_ptr + (actual_size - 1)) = 'A'; *(char_ptr + (actual_size - 1)) = 'A';
...@@ -2537,6 +2553,19 @@ TEST_F(PartitionAllocTest, TagBasic) { ...@@ -2537,6 +2553,19 @@ TEST_F(PartitionAllocTest, TagBasic) {
#endif #endif
// Test that the optimized `GetSlotOffset` implementation produces valid
// results.
TEST_F(PartitionAllocTest, OptimizedGetSlotOffset) {
auto* current_bucket = allocator.root()->buckets;
for (size_t i = 0; i < kNumBuckets; ++i, ++current_bucket) {
for (size_t offset = 0; offset <= kMaxBucketed; offset += 4999) {
EXPECT_EQ(offset % current_bucket->slot_size,
current_bucket->GetSlotOffset(offset));
}
}
}
} // namespace internal } // namespace internal
} // namespace base } // namespace base
......
...@@ -201,6 +201,7 @@ uint8_t PartitionBucket<thread_safe>::get_system_pages_per_slot_span() { ...@@ -201,6 +201,7 @@ uint8_t PartitionBucket<thread_safe>::get_system_pages_per_slot_span() {
template <bool thread_safe> template <bool thread_safe>
void PartitionBucket<thread_safe>::Init(uint32_t new_slot_size) { void PartitionBucket<thread_safe>::Init(uint32_t new_slot_size) {
slot_size = new_slot_size; slot_size = new_slot_size;
slot_size_reciprocal = kReciprocalMask / new_slot_size + 1;
active_pages_head = PartitionPage<thread_safe>::get_sentinel_page(); active_pages_head = PartitionPage<thread_safe>::get_sentinel_page();
empty_pages_head = nullptr; empty_pages_head = nullptr;
decommitted_pages_head = nullptr; decommitted_pages_head = nullptr;
......
...@@ -30,6 +30,24 @@ struct PartitionBucket { ...@@ -30,6 +30,24 @@ struct PartitionBucket {
uint32_t num_system_pages_per_slot_span : 8; uint32_t num_system_pages_per_slot_span : 8;
uint32_t num_full_pages : 24; uint32_t num_full_pages : 24;
// `slot_size_reciprocal` is used to improve the performance of
// `GetSlotOffset`. It is computed as `(1 / size) * (2 ** M)` where M is
// chosen to provide the desired accuracy. As a result, we can replace a slow
// integer division (or modulo) operation with a pair of multiplication and a
// bit shift, i.e. `value / size` becomes `(value * size_reciprocal) >> M`.
uint64_t slot_size_reciprocal;
// This is `M` from the formula above. For accurate results, both `value` and
// `size`, which are bound by `kMaxBucketed` for our purposes, must be less
// than `2 ** (M / 2)`. On the other hand, the result of the expression
// `3 * M / 2` must be less than 64, otherwise integer overflow can occur.
static constexpr uint64_t kReciprocalShift = 42;
static constexpr uint64_t kReciprocalMask = (1ull << kReciprocalShift) - 1;
static_assert(
kMaxBucketed < (1 << (kReciprocalShift / 2)),
"GetSlotOffset may produce an incorrect result when kMaxBucketed is too "
"large.");
// Public API. // Public API.
void Init(uint32_t new_slot_size); void Init(uint32_t new_slot_size);
...@@ -82,6 +100,27 @@ struct PartitionBucket { ...@@ -82,6 +100,27 @@ struct PartitionBucket {
// This is where the guts of the bucket maintenance is done! // This is where the guts of the bucket maintenance is done!
bool SetNewActivePage(); bool SetNewActivePage();
// Returns an offset within an allocation slot.
ALWAYS_INLINE size_t GetSlotOffset(size_t offset_in_slot_span) {
// Knowing that slots are tightly packed in a slot span, calculate an offset
// using an equivalent of a modulo operation.
// See the static assertion for `kReciprocalShift` above.
PA_DCHECK(offset_in_slot_span <= kMaxBucketed);
PA_DCHECK(slot_size <= kMaxBucketed);
// Calculate `decimal_part{offset_in_slot / size} * (2 ** M)` first.
uint64_t offset_in_slot =
(offset_in_slot_span * slot_size_reciprocal) & kReciprocalMask;
// (decimal_part * size) * (2 ** M) == offset_in_slot_span % size * (2 ** M)
// Divide by `2 ** M` using a bit shift.
offset_in_slot = (offset_in_slot * slot_size) >> kReciprocalShift;
PA_DCHECK(offset_in_slot_span % slot_size == offset_in_slot);
return static_cast<size_t>(offset_in_slot);
}
private: private:
static NOINLINE void OnFull(); static NOINLINE void OnFull();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment