Commit 6b20e465 authored by Siddhartha's avatar Siddhartha Committed by Commit Bot

Add a thread local cache for CFI table unwinder

Adds a simple cache with prime modulo hashing. This cache gets 95% hit
rate and gives us 30% performance improvement compared to without cache
for running heap profiler.

BUG=819888

Change-Id: I4c3dd6cf34ee1db21469e56886eaa3f62dd1881d
Reviewed-on: https://chromium-review.googlesource.com/985234Reviewed-by: default avatarDmitry Skiba <dskiba@chromium.org>
Commit-Queue: Siddhartha S <ssid@chromium.org>
Cr-Commit-Position: refs/heads/master@{#548570}
parent 0aaf77db
......@@ -8,7 +8,6 @@
#include <sys/types.h>
#include "base/android/apk_assets.h"
#include "base/debug/stack_trace.h"
#if !defined(ARCH_CPU_ARMEL)
#error This file should not be built for this architecture.
......@@ -120,7 +119,9 @@ CFIBacktraceAndroid* CFIBacktraceAndroid::GetInstance() {
return instance;
}
CFIBacktraceAndroid::CFIBacktraceAndroid() {
CFIBacktraceAndroid::CFIBacktraceAndroid()
: thread_local_cfi_cache_(
[](void* ptr) { delete static_cast<CFICache*>(ptr); }) {
Initialize();
}
......@@ -171,8 +172,7 @@ void CFIBacktraceAndroid::ParseCFITables() {
unw_data_start_addr_ = unw_index_indices_col_ + unw_index_row_count_;
}
size_t CFIBacktraceAndroid::Unwind(const void** out_trace,
size_t max_depth) const {
size_t CFIBacktraceAndroid::Unwind(const void** out_trace, size_t max_depth) {
// This function walks the stack using the call frame information to find the
// return addresses of all the functions that belong to current binary in call
// stack. For each function the CFI table defines the offset of the previous
......@@ -209,9 +209,13 @@ size_t CFIBacktraceAndroid::Unwind(const void** out_trace,
return depth;
}
bool CFIBacktraceAndroid::FindCFIRowForPC(
uintptr_t func_addr,
CFIBacktraceAndroid::CFIRow* cfi) const {
bool CFIBacktraceAndroid::FindCFIRowForPC(uintptr_t func_addr,
CFIBacktraceAndroid::CFIRow* cfi) {
auto* cache = GetThreadLocalCFICache();
*cfi = {0};
if (cache->Find(func_addr, cfi))
return true;
// Consider each column of UNW_INDEX table as arrays of uintptr_t (function
// addresses) and uint16_t (indices). Define start and end iterator on the
// first column array (addresses) and use std::lower_bound() to binary search
......@@ -220,7 +224,6 @@ bool CFIBacktraceAndroid::FindCFIRowForPC(
unw_index_function_col_ + unw_index_row_count_;
const uintptr_t* found =
std::lower_bound(unw_index_function_col_, unw_index_fn_end, func_addr);
*cfi = {0};
// If found is start, then the given function is not in the table. If the
// given pc is start of a function then we cannot unwind.
......@@ -280,8 +283,32 @@ bool CFIBacktraceAndroid::FindCFIRowForPC(
*cfi = {cfi_row.cfa_offset(), ra_offset};
DCHECK(cfi->cfa_offset);
DCHECK(cfi->ra_offset);
// safe to update since the cache is thread local.
cache->Add(func_addr, *cfi);
return true;
}
CFIBacktraceAndroid::CFICache* CFIBacktraceAndroid::GetThreadLocalCFICache() {
auto* cache = static_cast<CFICache*>(thread_local_cfi_cache_.Get());
if (!cache) {
cache = new CFICache();
thread_local_cfi_cache_.Set(cache);
}
return cache;
}
void CFIBacktraceAndroid::CFICache::Add(uintptr_t address, CFIRow cfi) {
cache_[address % kLimit] = {address, cfi};
}
bool CFIBacktraceAndroid::CFICache::Find(uintptr_t address, CFIRow* cfi) {
if (cache_[address % kLimit].address == address) {
*cfi = cache_[address % kLimit].cfi;
return true;
}
return false;
}
} // namespace trace_event
} // namespace base
......@@ -14,6 +14,7 @@
#include "base/debug/debugging_buildflags.h"
#include "base/files/memory_mapped_file.h"
#include "base/gtest_prod_util.h"
#include "base/threading/thread_local_storage.h"
namespace base {
namespace trace_event {
......@@ -40,16 +41,17 @@ class BASE_EXPORT CFIBacktraceAndroid {
// Returns the program counters by unwinding stack in the current thread in
// order of latest call frame first. Unwinding works only if
// can_unwind_stack_frames() returns true. This function does not allocate
// memory from heap. For each stack frame, this method searches through the
// can_unwind_stack_frames() returns true. This function allocates memory from
// heap for caches. For each stack frame, this method searches through the
// unwind table mapped in memory to find the unwind information for function
// and walks the stack to find all the return address. This only works until
// the last function call from the chrome.so. We do not have unwind
// information to unwind beyond any frame outside of chrome.so. Calls to
// Unwind() are thread safe and lock free, once Initialize() returns success.
size_t Unwind(const void** out_trace, size_t max_depth) const;
size_t Unwind(const void** out_trace, size_t max_depth);
private:
FRIEND_TEST_ALL_PREFIXES(CFIBacktraceAndroidTest, TestCFICache);
FRIEND_TEST_ALL_PREFIXES(CFIBacktraceAndroidTest, TestFindCFIRow);
FRIEND_TEST_ALL_PREFIXES(CFIBacktraceAndroidTest, TestUnwinding);
......@@ -62,12 +64,43 @@ class BASE_EXPORT CFIBacktraceAndroid {
// The offset of the call frame address of previous function from the
// current stack pointer. Rule for unwinding SP: SP_prev = SP_cur +
// cfa_offset.
size_t cfa_offset = 0;
uint16_t cfa_offset = 0;
// The offset of location of return address from the previous call frame
// address. Rule for unwinding PC: PC_prev = * (SP_prev - ra_offset).
size_t ra_offset = 0;
uint16_t ra_offset = 0;
};
// A simple cache that stores entries in table using prime modulo hashing.
// This cache with 500 entries already gives us 95% hit rate, and fits in a
// single system page (usually 4KiB). Using a thread local cache for each
// thread gives us 30% improvements on performance of heap profiling.
class CFICache {
public:
// Add new item to the cache. It replaces an existing item with same hash.
// Constant time operation.
void Add(uintptr_t address, CFIRow cfi);
// Finds the given address and fills |cfi| with the info for the address.
// returns true if found, otherwise false. Assumes |address| is never 0.
bool Find(uintptr_t address, CFIRow* cfi);
private:
FRIEND_TEST_ALL_PREFIXES(CFIBacktraceAndroidTest, TestCFICache);
// Size is the highest prime which fits the cache in a single system page,
// usually 4KiB. A prime is chosen to make sure addresses are hashed evenly.
static const int kLimit = 509;
struct AddrAndCFI {
uintptr_t address;
CFIRow cfi;
};
AddrAndCFI cache_[kLimit] = {};
};
static_assert(sizeof(CFIBacktraceAndroid::CFICache) < 4096,
"The cache does not fit in a single page.");
CFIBacktraceAndroid();
~CFIBacktraceAndroid();
......@@ -86,7 +119,9 @@ class BASE_EXPORT CFIBacktraceAndroid {
// Finds the CFI row for the given |func_addr| in terms of offset from
// the start of the current binary.
bool FindCFIRowForPC(uintptr_t func_addr, CFIRow* out) const;
bool FindCFIRowForPC(uintptr_t func_addr, CFIRow* out);
CFICache* GetThreadLocalCFICache();
// Details about the memory mapped region which contains the libchrome.so
// library file.
......@@ -111,6 +146,8 @@ class BASE_EXPORT CFIBacktraceAndroid {
const uint16_t* unw_data_start_addr_ = nullptr;
bool can_unwind_stack_frames_ = false;
ThreadLocalStorage::Slot thread_local_cfi_cache_;
};
} // namespace trace_event
......
......@@ -91,7 +91,7 @@ TEST(CFIBacktraceAndroidTest, TestFindCFIRow) {
unwinder->ParseCFITables();
CFIBacktraceAndroid::CFIRow cfi_row = {0};
EXPECT_FALSE(unwinder->FindCFIRowForPC(0x00, &cfi_row));
EXPECT_FALSE(unwinder->FindCFIRowForPC(0x01, &cfi_row));
EXPECT_FALSE(unwinder->FindCFIRowForPC(0x100, &cfi_row));
EXPECT_FALSE(unwinder->FindCFIRowForPC(0x1502, &cfi_row));
EXPECT_FALSE(unwinder->FindCFIRowForPC(0x3000, &cfi_row));
......@@ -123,6 +123,73 @@ TEST(CFIBacktraceAndroidTest, TestFindCFIRow) {
EXPECT_EQ(kRow5, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x2210, &cfi_row));
EXPECT_EQ(kRow5, cfi_row);
// Test if cache is used on the future calls to Find, all addresses should
// have different hash. Resetting the memory map to make sure it is never
// accessed in Find().
unwinder->cfi_mmap_.reset(new MemoryMappedFile());
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1002, &cfi_row));
EXPECT_EQ(kRow1, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1003, &cfi_row));
EXPECT_EQ(kRow1, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1008, &cfi_row));
EXPECT_EQ(kRow2, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1009, &cfi_row));
EXPECT_EQ(kRow2, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1039, &cfi_row));
EXPECT_EQ(kRow2, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1080, &cfi_row));
EXPECT_EQ(kRow3, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x1100, &cfi_row));
EXPECT_EQ(kRow3, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x2050, &cfi_row));
EXPECT_EQ(kRow4, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x2208, &cfi_row));
EXPECT_EQ(kRow5, cfi_row);
EXPECT_TRUE(unwinder->FindCFIRowForPC(0x2210, &cfi_row));
EXPECT_EQ(kRow5, cfi_row);
}
TEST(CFIBacktraceAndroidTest, TestCFICache) {
// Use ASSERT macros in this function since they are in loop and using EXPECT
// prints too many failures.
CFIBacktraceAndroid::CFICache cache;
CFIBacktraceAndroid::CFIRow cfi;
// Empty cache should not find anything.
EXPECT_FALSE(cache.Find(1, &cfi));
// Insert 1 - 2*kLimit
for (size_t i = 1; i <= 2 * cache.kLimit; ++i) {
CFIBacktraceAndroid::CFIRow val = {4 * i, 2 * i};
cache.Add(i, val);
ASSERT_TRUE(cache.Find(i, &cfi));
ASSERT_EQ(cfi, val);
// Inserting more than kLimit items evicts |i - cache.kLimit| from cache.
if (i >= cache.kLimit)
ASSERT_FALSE(cache.Find(i - cache.kLimit, &cfi));
}
// Cache contains kLimit+1 - 2*kLimit.
// Check that 1 - kLimit cannot be found.
for (size_t i = 1; i <= cache.kLimit; ++i) {
ASSERT_FALSE(cache.Find(i, &cfi));
}
// Check if kLimit+1 - 2*kLimit still exists in cache.
for (size_t i = cache.kLimit + 1; i <= 2 * cache.kLimit; ++i) {
CFIBacktraceAndroid::CFIRow val = {4 * i, 2 * i};
ASSERT_TRUE(cache.Find(i, &cfi));
ASSERT_EQ(cfi, val);
}
// Insert 2*kLimit+1, will evict kLimit.
cfi = {1, 1};
cache.Add(2 * cache.kLimit + 1, cfi);
EXPECT_TRUE(cache.Find(2 * cache.kLimit + 1, &cfi));
EXPECT_FALSE(cache.Find(cache.kLimit + 1, &cfi));
// Cache contains kLimit+1 - 2*kLimit.
}
} // namespace trace_event
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment