Commit 7aade260 authored by Haiyang Pan's avatar Haiyang Pan Committed by Commit Bot

Revert "[PartitionAlloc] Use reciprocal multiplication to speed up GetSlotOffset"

This reverts commit 4d9450df.

Reason for revert: Likely caused the test PartitionAllocTest.DumpMemoryStats failing consistently in android x86 builders since:
* https://ci.chromium.org/p/chromium/builders/ci/android-marshmallow-x86-rel/1084
* https://ci.chromium.org/p/chromium/builders/ci/android-pie-x86-rel/1992

Original change's description:
> [PartitionAlloc] Use reciprocal multiplication to speed up GetSlotOffset
> 
> Currently, the "get offset of a pointer within its allocated slot"
> functionality, which is used extensively by several MiraclePtr
> implementations, performs an expensive modulo operation. This commit
> replaces it with a cheaper sequence of multiplications and bit shifts.
> 
> Bug: 1092288
> Change-Id: I866689a79fff704b5f83fc07d8f28b3386ef644d
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2335443
> Commit-Queue: Sergei Glazunov <glazunov@google.com>
> Reviewed-by: Benoit L <lizeb@chromium.org>
> Reviewed-by: Bartek Nowierski <bartekn@chromium.org>
> Reviewed-by: Kentaro Hara <haraken@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#802194}

TBR=haraken@chromium.org,lizeb@chromium.org,bartekn@chromium.org,glazunov@google.com

Change-Id: Iaed8d78231201dd69a880e8a30028ddd64bead94
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 1092288
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2380107Reviewed-by: default avatarHaiyang Pan <hypan@google.com>
Commit-Queue: Haiyang Pan <hypan@google.com>
Cr-Commit-Position: refs/heads/master@{#802323}
parent 47ebb3cf
......@@ -62,6 +62,9 @@ static_assert(!(kPartitionPageSize % kSystemPageSize),
static_assert(sizeof(internal::PartitionPage<internal::ThreadSafe>) <=
kPageMetadataSize,
"PartitionPage should not be too big");
static_assert(sizeof(internal::PartitionBucket<internal::ThreadSafe>) <=
kPageMetadataSize,
"PartitionBucket should not be too big");
static_assert(kPageMetadataSize * kNumPartitionPagesPerSuperPage <=
kSystemPageSize,
"page metadata fits in hole");
......
......@@ -832,14 +832,18 @@ ALWAYS_INLINE size_t PartitionAllocGetSlotOffset(void* ptr) {
auto* page =
internal::PartitionAllocGetPageForSize<internal::ThreadSafe>(ptr);
PA_DCHECK(PartitionRoot<internal::ThreadSafe>::FromPage(page)->allow_extras);
size_t slot_size = page->bucket->slot_size;
// Get the offset from the beginning of the slot span.
uintptr_t ptr_addr = reinterpret_cast<uintptr_t>(ptr);
uintptr_t slot_span_start = reinterpret_cast<uintptr_t>(
internal::PartitionPage<internal::ThreadSafe>::ToPointer(page));
size_t offset_in_slot_span = ptr_addr - slot_span_start;
return page->bucket->GetSlotOffset(offset_in_slot_span);
// Knowing that slots are tightly packed in a slot span, calculate an offset
// within a slot using simple % operation.
// TODO(bartekn): Try to replace % with multiplication&shift magic.
size_t offset_in_slot = offset_in_slot_span % slot_size;
return offset_in_slot;
}
#endif // BUILDFLAG(USE_PARTITION_ALLOC)
......
......@@ -120,12 +120,12 @@ static const size_t kMaxSystemPagesPerSlotSpan =
//
// A direct-mapped page's metadata page has the following layout:
//
// +---------------------------------------------------------+
// | SuperPageExtentEntry (32 B) |
// | PartitionPage (32 B) |
// | PartitionBucket (32 B on 32-, 40 B on 64-bit platforms) |
// | PartitionDirectMapExtent (32 B) |
// +---------------------------------------------------------+
// +---------------------------------+
// | SuperPageExtentEntry (32 B) |
// | PartitionPage (32 B) |
// | PartitionBucket (32 B) |
// | PartitionDirectMapExtent (32 B) |
// +---------------------------------+
static const size_t kSuperPageShift = 21; // 2 MiB
static const size_t kSuperPageSize = 1 << kSuperPageShift;
......
......@@ -904,26 +904,6 @@ TEST_F(PartitionAllocTest, AllocGetSizeAndOffset) {
offset);
}
#endif
// Allocate the maximum allowed bucketed size.
requested_size = kMaxBucketed - kExtraAllocSize;
predicted_size = allocator.root()->ActualSize(requested_size);
ptr = allocator.root()->Alloc(requested_size, type_name);
EXPECT_TRUE(ptr);
actual_size = allocator.root()->GetSize(ptr);
EXPECT_EQ(predicted_size, actual_size);
EXPECT_EQ(requested_size, actual_size);
#if defined(ARCH_CPU_64_BITS) && !defined(OS_NACL)
for (size_t offset = 0; offset < requested_size; offset += 4999) {
EXPECT_EQ(PartitionAllocGetSlotOffset(static_cast<char*>(ptr) + offset),
offset);
// TODO(bartekn): Remove when CheckedPtr2OrMTEImpl no longer calls
// mismatched vartiant.
EXPECT_EQ(PartitionAllocGetSlotOffset(static_cast<char*>(ptr) + offset),
offset);
}
#endif
// Check that we can write at the end of the reported size too.
char* char_ptr = reinterpret_cast<char*>(ptr);
*(char_ptr + (actual_size - 1)) = 'A';
......@@ -2564,19 +2544,6 @@ TEST_F(PartitionAllocTest, TagBasic) {
#endif
// Test that the optimized `GetSlotOffset` implementation produces valid
// results.
TEST_F(PartitionAllocTest, OptimizedGetSlotOffset) {
auto* current_bucket = allocator.root()->buckets;
for (size_t i = 0; i < kNumBuckets; ++i, ++current_bucket) {
for (size_t offset = 0; offset <= kMaxBucketed; offset += 4999) {
EXPECT_EQ(offset % current_bucket->slot_size,
current_bucket->GetSlotOffset(offset));
}
}
}
} // namespace internal
} // namespace base
......
......@@ -202,7 +202,6 @@ uint8_t PartitionBucket<thread_safe>::get_system_pages_per_slot_span() {
template <bool thread_safe>
void PartitionBucket<thread_safe>::Init(uint32_t new_slot_size) {
slot_size = new_slot_size;
slot_size_reciprocal = kReciprocalMask / new_slot_size + 1;
active_pages_head = PartitionPage<thread_safe>::get_sentinel_page();
empty_pages_head = nullptr;
decommitted_pages_head = nullptr;
......
......@@ -30,24 +30,6 @@ struct PartitionBucket {
uint32_t num_system_pages_per_slot_span : 8;
uint32_t num_full_pages : 24;
// `slot_size_reciprocal` is used to improve the performance of
// `GetSlotOffset`. It is computed as `(1 / size) * (2 ** M)` where M is
// chosen to provide the desired accuracy. As a result, we can replace a slow
// integer division (or modulo) operation with a pair of multiplication and a
// bit shift, i.e. `value / size` becomes `(value * size_reciprocal) >> M`.
uint64_t slot_size_reciprocal;
// This is `M` from the formula above. For accurate results, both `value` and
// `size`, which are bound by `kMaxBucketed` for our purposes, must be less
// than `2 ** (M / 2)`. On the other hand, the result of the expression
// `3 * M / 2` must be less than 64, otherwise integer overflow can occur.
static constexpr uint64_t kReciprocalShift = 42;
static constexpr uint64_t kReciprocalMask = (1ull << kReciprocalShift) - 1;
static_assert(
kMaxBucketed < (1 << (kReciprocalShift / 2)),
"GetSlotOffset may produce an incorrect result when kMaxBucketed is too "
"large.");
// Public API.
void Init(uint32_t new_slot_size);
......@@ -100,27 +82,6 @@ struct PartitionBucket {
// This is where the guts of the bucket maintenance is done!
bool SetNewActivePage();
// Returns an offset within an allocation slot.
ALWAYS_INLINE size_t GetSlotOffset(size_t offset_in_slot_span) {
// Knowing that slots are tightly packed in a slot span, calculate an offset
// using an equivalent of a modulo operation.
// See the static assertion for `kReciprocalShift` above.
PA_DCHECK(offset_in_slot_span <= kMaxBucketed);
PA_DCHECK(slot_size <= kMaxBucketed);
// Calculate `decimal_part{offset_in_slot / size} * (2 ** M)` first.
uint64_t offset_in_slot =
(offset_in_slot_span * slot_size_reciprocal) & kReciprocalMask;
// (decimal_part * size) * (2 ** M) == offset_in_slot_span % size * (2 ** M)
// Divide by `2 ** M` using a bit shift.
offset_in_slot = (offset_in_slot * slot_size) >> kReciprocalShift;
PA_DCHECK(offset_in_slot_span % slot_size == offset_in_slot);
return static_cast<size_t>(offset_in_slot);
}
private:
static NOINLINE void OnFull();
......
......@@ -30,12 +30,7 @@ PartitionDirectMapExtent<thread_safe>::FromPage(
PartitionPage<thread_safe>* page) {
PA_DCHECK(page->bucket->is_direct_mapped());
return reinterpret_cast<PartitionDirectMapExtent<thread_safe>*>(
reinterpret_cast<char*>(page) +
kPageMetadataSize + // sizeof(PartitionSuperPageExtentEntry)
// TODO(crbug.com/787153): Refactor the code to have
// a more robust offset calculation.
sizeof(PartitionPage<thread_safe>) +
sizeof(PartitionBucket<thread_safe>));
reinterpret_cast<char*>(page) + 3 * kPageMetadataSize);
}
} // namespace internal
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment