Commit 4ca50e35 authored by Bartek Nowierski's avatar Bartek Nowierski Committed by Commit Bot

Initial CheckedPtr2 implementation

This code is currently unused, but is needed to evaluate performance
impact of the CheckedPtr initiative.
https://docs.google.com/document/d/1pnnOAIz_DMWDI4oIOFoMAqLnf_MZ2GsrJNb_dbQ3ZBg

The current implementation is an expensive no-op. It offers no run-time
verification of pointer correctness, but it performs work as if it did.
It is expected to have similar performance characteristics to the
designed CheckedPtr2. CHECKED_PTR2_PROTECTION_ENABLED and "TEST"
comments show how the actual code would look like had all needed
support been already implemented.

Bug: 1073933
Change-Id: I4fc6a1a3b6701dcae9bb38ff3fa01fd6276812fc
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2162493
Commit-Queue: Bartek Nowierski <bartekn@chromium.org>
Reviewed-by: default avatarBenoit L <lizeb@chromium.org>
Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Cr-Commit-Position: refs/heads/master@{#772675}
parent 07dfc1a6
...@@ -11,6 +11,22 @@ ...@@ -11,6 +11,22 @@
#include <utility> #include <utility>
#include "base/compiler_specific.h" #include "base/compiler_specific.h"
#include "base/logging.h"
#include "build/build_config.h"
// TEST: We can't use protection in the real code (yet) because it may lead to
// crashes in absence of PartitionAlloc support. Setting it to 0 will disable
// the protection, while preserving all calculations.
#define CHECKED_PTR2_PROTECTION_ENABLED 0
#define CHECKED_PTR2_USE_NO_OP_WRAPPER 0
// Set it to 1 to avoid branches when checking if per-pointer protection is
// enabled.
#define CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED 0
// Set it to 1 to avoid branches when dereferencing the pointer.
// Must be 1 if the above is 1.
#define CHECKED_PTR2_AVOID_BRANCH_WHEN_DEREFERENCING 0
namespace base { namespace base {
...@@ -67,6 +83,275 @@ struct CheckedPtrNoOpImpl { ...@@ -67,6 +83,275 @@ struct CheckedPtrNoOpImpl {
static ALWAYS_INLINE void IncrementSwapCountForTest() {} static ALWAYS_INLINE void IncrementSwapCountForTest() {}
}; };
#if defined(ARCH_CPU_64_BITS)
constexpr int kValidAddressBits = 48;
constexpr uintptr_t kAddressMask = (1ull << kValidAddressBits) - 1;
constexpr int kGenerationBits = sizeof(uintptr_t) * 8 - kValidAddressBits;
constexpr uintptr_t kGenerationMask = ~kAddressMask;
constexpr int kTopBitShift = 63;
constexpr uintptr_t kTopBit = 1ull << kTopBitShift;
static_assert(kTopBit << 1 == 0, "kTopBit should really be the top bit");
static_assert((kTopBit & kGenerationMask) > 0,
"kTopBit bit must be inside the generation region");
// TEST: Use volatile so that the read isn't optimized out.
static volatile bool g_enabled = true;
struct CheckedPtr2Impl {
static_assert(sizeof(uintptr_t) == 8,
"only 64-bit architectures are supported");
// Wraps a pointer, and returns its uintptr_t representation.
static ALWAYS_INLINE uintptr_t WrapRawPtr(const volatile void* cv_ptr) {
void* ptr = const_cast<void*>(cv_ptr);
uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
#if CHECKED_PTR2_USE_NO_OP_WRAPPER
static_assert(!CHECKED_PTR2_PROTECTION_ENABLED, "");
#else
// Make sure that the address bits that will be used for generation are 0.
// Otherwise the logic may fail.
DCHECK_EQ(ExtractGeneration(addr), 0ull);
// TEST: |g_enabled| should be replaced with a check if the allocation is on
// PartitionAlloc. There could be also a Finch check added.
if (ptr == nullptr || !g_enabled) {
return addr;
}
// TEST: It should be |size = base::PartitionAllocGetSize(ptr)|, however
// |PartitionAllocGetSize()| will likely crash if used an a non-PA pointer.
// For now, replacing it with something that always passes.
//
// TEST: There shouldn't be |volatile|; that's to prevent optimization of %.
volatile size_t size = (addr & (addr - 1)) ^ addr;
if (addr % size != 0) {
DCHECK(false);
return addr;
}
// Read the generation from 16 bits before the allocation. Then place it in
// the top bits of the address.
//
// TODO(bartekn): Consider if casting to |volatile*| is needed. I
// believe it's needed when dereferencing, not sure about here.
static_assert(sizeof(uint16_t) * 8 == kGenerationBits, "");
#if CHECKED_PTR2_PROTECTION_ENABLED
uintptr_t generation = *(static_cast<volatile uint16_t*>(ptr) - 1);
#else
// TEST: Reading from offset -1 may crash without PA support.
// Just read from offset 0 to attain the same perf characteristics as the
// expected production solution.
// This generation will be ignored anyway either when unwrapping or below
// (depending on the algorithm variant), on the
// !CHECKED_PTR2_PROTECTION_ENABLED path.
uintptr_t generation = *(static_cast<volatile uint16_t*>(ptr));
#endif // #else CHECKED_PTR2_PROTECTION_ENABLED
generation <<= kValidAddressBits;
addr |= generation;
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
// Always set top bit to 1, to indicated that the protection is enabled.
addr |= kTopBit;
#if !CHECKED_PTR2_PROTECTION_ENABLED
// TEST: Clear the generation, or else it could crash without PA support.
// If the top bit was set, the unwrapper would read from before the address
// address, but with it cleared, it'll read from the address itself.
addr &= kAddressMask;
#endif // #if !CHECKED_PTR2_PROTECTION_ENABLED
#endif // #if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
#endif // #if CHECKED_PTR2_USE_NO_OP_WRAPPER
return addr;
}
// Returns equivalent of |WrapRawPtr(nullptr)|. Separated out to make it a
// constexpr.
static constexpr ALWAYS_INLINE uintptr_t GetWrappedNullPtr() {
return kWrappedNullPtr;
}
static ALWAYS_INLINE uintptr_t
SafelyUnwrapPtrInternal(uintptr_t wrapped_ptr) {
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
// Top bit tells if the protection is enabled. Use it to decide whether to
// read the word before the allocation, which exists only if the protection
// is enabled. Otherwise it may crash, in which case read the data from the
// beginning of the allocation instead and ignore it later. All this magic
// is to avoid a branch, for performance reasons.
//
// A couple examples, assuming 64-bit system (continued below):
// Ex.1: wrapped_ptr=0x8442000012345678
// => enabled=0x8000000000000000
// => offset=1
// Ex.2: wrapped_ptr=0x0000000012345678
// => enabled=0x0000000000000000
// => offset=0
uintptr_t enabled = wrapped_ptr & kTopBit;
// We can't have protection disabled and generation set in the same time.
DCHECK(!(enabled == 0 && (ExtractGeneration(wrapped_ptr)) != 0));
uintptr_t offset = enabled >> kTopBitShift; // 0 or 1
// Use offset to decide if the generation should be read at the beginning or
// before the allocation.
// TODO(bartekn): Do something about 1-byte allocations. Reading 2-byte
// generation at the allocation could crash. This case is executed
// specifically for non-PartitionAlloc pointers, so we can't make
// assumptions about alignment.
//
// Cast to volatile to ensure memory is read. E.g. in a tight loop, the
// compiler could cache the value in a register and thus could miss that
// another thread freed memory and cleared generation.
//
// Examples (continued):
// Ex.1: generation_ptr=0x0000000012345676
// a) if pointee wasn't freed, read e.g. generation=0x0442 (could be
// also 0x8442, the top bit is overwritten later)
// b) if pointee was freed, read e.g. generation=0x1234 (could be
// anything)
// Ex.2: generation_ptr=0x0000000012345678, read e.g. 0x2345 (doesn't
// matter what we read, as long as this read doesn't crash)
volatile uint16_t* generation_ptr =
reinterpret_cast<volatile uint16_t*>(ExtractAddress(wrapped_ptr)) -
offset;
uintptr_t generation = *generation_ptr;
// Shift generation into the right place and add back the enabled bit.
//
// Examples (continued):
// Ex.1:
// a) generation=0x8442000000000000
// a) generation=0x9234000000000000
// Ex.2: generation=0x2345000000000000
generation <<= kValidAddressBits;
generation |= enabled;
// If the protection isn't enabled, clear top bits. Casting to a signed
// type makes >> sign extend the last bit.
//
// Examples (continued):
// Ex.1: mask=0xffff000000000000
// a) generation=0x8442000000000000
// b) generation=0x9234000000000000
// Ex.2: mask=0x0000000000000000 => generation=0x0000000000000000
uintptr_t mask = static_cast<intptr_t>(enabled) >> (kGenerationBits - 1);
generation &= mask;
// Use hardware to detect generation mismatch. CPU will crash if top bits
// aren't all 0 (technically it won't if all bits are 1, but that's a kernel
// mode address, which isn't allowed either... also, top bit will be always
// zeroed out).
//
// Examples (continued):
// Ex.1:
// a) returning 0x0000000012345678
// b) returning 0x1676000012345678 (this will generate a desired crash)
// Ex.2: returning 0x0000000012345678
static_assert(CHECKED_PTR2_AVOID_BRANCH_WHEN_DEREFERENCING, "");
return generation ^ wrapped_ptr;
#else // #if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
uintptr_t ptr_generation = wrapped_ptr >> kValidAddressBits;
if (ptr_generation > 0) {
// Read generation from before the allocation.
//
// Cast to volatile to ensure memory is read. E.g. in a tight loop, the
// compiler could cache the value in a register and thus could miss that
// another thread freed memory and cleared generation.
#if CHECKED_PTR2_PROTECTION_ENABLED
uintptr_t read_generation =
*(reinterpret_cast<volatile uint16_t*>(ExtractAddress(wrapped_ptr)) -
1);
#else
// TEST: Reading from before the pointer may crash. See more above...
uintptr_t read_generation =
*(reinterpret_cast<volatile uint16_t*>(ExtractAddress(wrapped_ptr)));
#endif
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_DEREFERENCING
// Use hardware to detect generation mismatch. CPU will crash if top bits
// aren't all 0 (technically it won't if all bits are 1, but that's a
// kernel mode address, which isn't allowed either).
read_generation <<= kValidAddressBits;
return read_generation ^ wrapped_ptr;
#else
#if CHECKED_PTR2_PROTECTION_ENABLED
if (UNLIKELY(ptr_generation != read_generation))
IMMEDIATE_CRASH();
#else
// TEST: Use volatile to prevent optimizing out the calculations leading
// to this point.
volatile bool x = false;
if (ptr_generation != read_generation)
x = true;
#endif // #else CHECKED_PTR2_PROTECTION_ENABLED
return wrapped_ptr & kAddressMask;
#endif // #else CHECKED_PTR2_AVOID_BRANCH_WHEN_DEREFERENCING
}
return wrapped_ptr;
#endif // #else CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
}
// Unwraps the pointer's uintptr_t representation, while asserting that memory
// hasn't been freed. The function is allowed to crash on nullptr.
static ALWAYS_INLINE void* SafelyUnwrapPtrForDereference(
uintptr_t wrapped_ptr) {
#if CHECKED_PTR2_PROTECTION_ENABLED
return reinterpret_cast<void*>(SafelyUnwrapPtrInternal(wrapped_ptr));
#else
// TEST: Use volatile to prevent optimizing out the calculations leading to
// this point.
// |SafelyUnwrapPtrInternal| was separated out solely for this purpose.
volatile uintptr_t addr = SafelyUnwrapPtrInternal(wrapped_ptr);
return reinterpret_cast<void*>(addr);
#endif
}
// Unwraps the pointer's uintptr_t representation, while asserting that memory
// hasn't been freed. The function must handle nullptr gracefully.
static ALWAYS_INLINE void* SafelyUnwrapPtrForExtraction(
uintptr_t wrapped_ptr) {
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
// In this implementation SafelyUnwrapPtrForDereference doesn't tolerate
// nullptr, because it reads unconditionally to avoid branches. Handle the
// nullptr case here.
if (wrapped_ptr == kWrappedNullPtr)
return nullptr;
return reinterpret_cast<void*>(SafelyUnwrapPtrForDereference(wrapped_ptr));
#else
// In this implementation SafelyUnwrapPtrForDereference handles nullptr case
// well.
return reinterpret_cast<void*>(SafelyUnwrapPtrForDereference(wrapped_ptr));
#endif
}
// Unwraps the pointer's uintptr_t representation, without making an assertion
// on whether memory was freed or not.
static ALWAYS_INLINE void* UnsafelyUnwrapPtrForComparison(
uintptr_t wrapped_ptr) {
return reinterpret_cast<void*>(ExtractAddress(wrapped_ptr));
}
// Advance the wrapped pointer by |delta| bytes.
static ALWAYS_INLINE uintptr_t Advance(uintptr_t wrapped_ptr, size_t delta) {
// Mask out the generation to disable the protection. It's not supported for
// pointers inside an allocation.
return ExtractAddress(wrapped_ptr) + delta;
}
// This is for accounting only, used by unit tests.
static ALWAYS_INLINE void IncrementSwapCountForTest() {}
private:
static ALWAYS_INLINE uintptr_t ExtractAddress(uintptr_t wrapped_ptr) {
return wrapped_ptr & kAddressMask;
}
static ALWAYS_INLINE uintptr_t ExtractGeneration(uintptr_t wrapped_ptr) {
return wrapped_ptr & kGenerationMask;
}
// This relies on nullptr and 0 being equal in the eyes of reinterpret_cast,
// which apparently isn't true in some rare environments.
static constexpr uintptr_t kWrappedNullPtr = 0;
};
#endif // #if defined(ARCH_CPU_64_BITS)
template <typename T> template <typename T>
struct DereferencedPointerType { struct DereferencedPointerType {
using Type = decltype(*std::declval<T*>()); using Type = decltype(*std::declval<T*>());
...@@ -92,7 +377,12 @@ struct DereferencedPointerType<void> {}; ...@@ -92,7 +377,12 @@ struct DereferencedPointerType<void> {};
// 2. Keep this class as small as possible, while still satisfying goal #1 (i.e. // 2. Keep this class as small as possible, while still satisfying goal #1 (i.e.
// we aren't striving to maximize compatibility with raw pointers, merely // we aren't striving to maximize compatibility with raw pointers, merely
// adding support for cases encountered so far). // adding support for cases encountered so far).
template <typename T, typename Impl = internal::CheckedPtrNoOpImpl> template <typename T,
#if defined(ARCH_CPU_64_BITS)
typename Impl = internal::CheckedPtr2Impl>
#else
typename Impl = internal::CheckedPtrNoOpImpl>
#endif
class CheckedPtr { class CheckedPtr {
public: public:
// CheckedPtr can be trivially default constructed (leaving |wrapped_ptr_| // CheckedPtr can be trivially default constructed (leaving |wrapped_ptr_|
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
using testing::Test; using testing::Test;
...@@ -535,4 +536,94 @@ TEST_F(CheckedPtrTest, AssignmentFromNullptr) { ...@@ -535,4 +536,94 @@ TEST_F(CheckedPtrTest, AssignmentFromNullptr) {
EXPECT_EQ(g_get_for_dereference_cnt, 0); EXPECT_EQ(g_get_for_dereference_cnt, 0);
} }
#if defined(ARCH_CPU_64_BITS)
TEST(CheckedPtr2Impl, WrapNull) {
ASSERT_EQ(base::internal::CheckedPtr2Impl::GetWrappedNullPtr(), 0u);
ASSERT_EQ(base::internal::CheckedPtr2Impl::WrapRawPtr(nullptr), 0u);
}
TEST(CheckedPtr2Impl, SafelyUnwrapNull) {
ASSERT_EQ(base::internal::CheckedPtr2Impl::SafelyUnwrapPtrForExtraction(0),
nullptr);
}
TEST(CheckedPtr2Impl, WrapAndSafelyUnwrap) {
char bytes[] = {0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0xBA, 0x42, 0x78, 0x89};
#if !CHECKED_PTR2_PROTECTION_ENABLED
// If protection is disabled, wrap & unwrap will read at the pointer, not
// before it.
bytes[8] = bytes[6];
bytes[9] = bytes[7];
#endif
void* ptr = bytes + sizeof(uintptr_t);
uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
uintptr_t set_top_bit = 0x0000000000000000;
uintptr_t mask = 0xFFFFFFFFFFFFFFFF;
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
set_top_bit = 0x8000000000000000;
#if !CHECKED_PTR2_PROTECTION_ENABLED
mask = 0x0000FFFFFFFFFFFF;
#endif
#endif
uintptr_t wrapped = base::internal::CheckedPtr2Impl::WrapRawPtr(ptr);
// First 2 bytes in the preceding word will be used as generation (in reverse
// order due to little-endianness).
#if CHECKED_PTR2_USE_NO_OP_WRAPPER
ASSERT_EQ(wrapped, addr);
std::ignore = set_top_bit;
std::ignore = mask;
#else
ASSERT_EQ(wrapped, (addr | 0x42BA000000000000 | set_top_bit) & mask);
#endif
ASSERT_EQ(base::internal::CheckedPtr2Impl::SafelyUnwrapPtrInternal(wrapped),
addr);
bytes[7] |= 0x80;
#if !CHECKED_PTR2_PROTECTION_ENABLED
bytes[9] = bytes[7];
#endif
wrapped = base::internal::CheckedPtr2Impl::WrapRawPtr(ptr);
#if CHECKED_PTR2_USE_NO_OP_WRAPPER
ASSERT_EQ(wrapped, addr);
#else
ASSERT_EQ(wrapped, (addr | 0xC2BA000000000000 | set_top_bit) & mask);
#endif
ASSERT_EQ(base::internal::CheckedPtr2Impl::SafelyUnwrapPtrInternal(wrapped),
addr);
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_DEREFERENCING
bytes[6] = 0;
bytes[7] = 0;
#if !CHECKED_PTR2_PROTECTION_ENABLED
bytes[8] = bytes[6];
bytes[9] = bytes[7];
#endif
mask = 0xFFFFFFFFFFFFFFFF;
#if CHECKED_PTR2_AVOID_BRANCH_WHEN_CHECKING_ENABLED
mask = 0x7FFFFFFFFFFFFFFF;
#if !CHECKED_PTR2_PROTECTION_ENABLED
mask = 0x0000FFFFFFFFFFFF;
#endif
#endif
// Mask out the top bit, because in some cases (not all), it may differ.
ASSERT_EQ(
base::internal::CheckedPtr2Impl::SafelyUnwrapPtrInternal(wrapped) & mask,
wrapped & mask);
#endif
}
TEST(CheckedPtr2Impl, SafelyUnwrapDisabled) {
char bytes[] = {0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0xBA, 0x42, 0x78, 0x89};
void* ptr = bytes + sizeof(uintptr_t);
uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
ASSERT_EQ(base::internal::CheckedPtr2Impl::SafelyUnwrapPtrInternal(addr),
addr);
}
#endif // #if defined(ARCH_CPU_64_BITS)
} // namespace } // namespace
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment