Commit f99a085f authored by Mike Wittman's avatar Mike Wittman Committed by Commit Bot

[Sampling profiler] Use cross-platform sampling implementation for Mac

Refactors the Mac sampling to use the StackSamplerImpl implementation,
with the platform-specific aspects moved into ThreadDelegateMac.

Bug: 931418
Change-Id: I8f468fa09b9ddc85921f21563ff41000a5c968f5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1535178Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Reviewed-by: default avatarLeonard Grey <lgrey@chromium.org>
Commit-Queue: Mike Wittman <wittman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#644804}
parent 3792a7df
...@@ -626,6 +626,8 @@ jumbo_component("base") { ...@@ -626,6 +626,8 @@ jumbo_component("base") {
"profiler/stack_sampling_profiler.cc", "profiler/stack_sampling_profiler.cc",
"profiler/stack_sampling_profiler.h", "profiler/stack_sampling_profiler.h",
"profiler/thread_delegate.h", "profiler/thread_delegate.h",
"profiler/thread_delegate_mac.cc",
"profiler/thread_delegate_mac.h",
"profiler/thread_delegate_win.cc", "profiler/thread_delegate_win.cc",
"profiler/thread_delegate_win.h", "profiler/thread_delegate_win.h",
"profiler/unwind_result.h", "profiler/unwind_result.h",
......
...@@ -9,13 +9,29 @@ ...@@ -9,13 +9,29 @@
#ifndef BASE_PROFILER_REGISTER_CONTEXT_H_ #ifndef BASE_PROFILER_REGISTER_CONTEXT_H_
#define BASE_PROFILER_REGISTER_CONTEXT_H_ #define BASE_PROFILER_REGISTER_CONTEXT_H_
#include <type_traits>
#include "build/build_config.h" #include "build/build_config.h"
#if defined(OS_WIN) #if defined(OS_WIN)
#include <windows.h> #include <windows.h>
#elif defined(OS_MACOSX)
#include <mach/machine/thread_status.h>
#endif #endif
// Helper function to account for the fact that platform-specific register state
// types may be unsigned and of the same size as uintptr_t, but not of the same
// type -- e.g. unsigned int vs. unsigned long on 32-bit Windows and unsigned
// long vs. unsigned long long on Mac.
template <typename T>
uintptr_t& AsUintPtr(T* value) {
static_assert(std::is_unsigned<T>::value && sizeof(T) == sizeof(uintptr_t),
"register state type must be equivalent to uintptr_t");
return *reinterpret_cast<uintptr_t*>(value);
}
#if defined(OS_WIN) #if defined(OS_WIN)
using RegisterContext = ::CONTEXT; using RegisterContext = ::CONTEXT;
inline uintptr_t& RegisterContextStackPointer(::CONTEXT* context) { inline uintptr_t& RegisterContextStackPointer(::CONTEXT* context) {
...@@ -24,10 +40,7 @@ inline uintptr_t& RegisterContextStackPointer(::CONTEXT* context) { ...@@ -24,10 +40,7 @@ inline uintptr_t& RegisterContextStackPointer(::CONTEXT* context) {
#elif defined(ARCH_CPU_ARM64) #elif defined(ARCH_CPU_ARM64)
return context->Sp; return context->Sp;
#else #else
// The reinterpret_cast accounts for the fact that Esp is a DWORD, which is an return AsUintPtr(&context->Esp);
// unsigned long, while uintptr_t is an unsigned int. The two types have the
// same representation on Windows, but C++ treats them as different.
return *reinterpret_cast<uintptr_t*>(&context->Esp);
#endif #endif
} }
...@@ -37,13 +50,24 @@ inline uintptr_t& RegisterContextFramePointer(::CONTEXT* context) { ...@@ -37,13 +50,24 @@ inline uintptr_t& RegisterContextFramePointer(::CONTEXT* context) {
#elif defined(ARCH_CPU_ARM64) #elif defined(ARCH_CPU_ARM64)
return context->Fp; return context->Fp;
#else #else
// The reinterpret_cast accounts for the fact that Ebp is a DWORD, which is an return AsUintPtr(&context->Ebp);
// unsigned long, while uintptr_t is an unsigned int. The two types have the
// same representation on Windows, but C++ treats them as different.
return *reinterpret_cast<uintptr_t*>(&context->Ebp);
#endif #endif
} }
#else // #if defined(OS_WIN)
#elif defined(OS_MACOSX) // #if defined(OS_WIN)
using RegisterContext = x86_thread_state64_t;
inline uintptr_t& RegisterContextStackPointer(x86_thread_state64_t* context) {
return AsUintPtr(&context->__rsp);
}
inline uintptr_t& RegisterContextFramePointer(x86_thread_state64_t* context) {
return AsUintPtr(&context->__rbp);
}
#else // #if defined(OS_WIN)
// Placeholders for other platforms. // Placeholders for other platforms.
struct RegisterContext { struct RegisterContext {
uintptr_t stack_pointer; uintptr_t stack_pointer;
...@@ -57,6 +81,7 @@ inline uintptr_t& RegisterContextStackPointer(RegisterContext* context) { ...@@ -57,6 +81,7 @@ inline uintptr_t& RegisterContextStackPointer(RegisterContext* context) {
inline uintptr_t& RegisterContextFramePointer(RegisterContext* context) { inline uintptr_t& RegisterContextFramePointer(RegisterContext* context) {
return context->frame_pointer; return context->frame_pointer;
} }
#endif // #if defined(OS_WIN) #endif // #if defined(OS_WIN)
#endif // BASE_PROFILER_REGISTER_CONTEXT_H_ #endif // BASE_PROFILER_REGISTER_CONTEXT_H_
...@@ -86,21 +86,24 @@ void StackSamplerImpl::RecordStackFrames(StackBuffer* stack_buffer, ...@@ -86,21 +86,24 @@ void StackSamplerImpl::RecordStackFrames(StackBuffer* stack_buffer,
DCHECK(stack_buffer); DCHECK(stack_buffer);
RegisterContext thread_context; RegisterContext thread_context;
bool success = CopyStack(stack_buffer, profile_builder, &thread_context); uintptr_t stack_top;
bool success =
CopyStack(stack_buffer, &stack_top, profile_builder, &thread_context);
if (!success) if (!success)
return; return;
if (test_delegate_) if (test_delegate_)
test_delegate_->OnPreStackWalk(); test_delegate_->OnPreStackWalk();
profile_builder->OnSampleCompleted(WalkStack(&thread_context)); profile_builder->OnSampleCompleted(WalkStack(&thread_context, stack_top));
} }
// Suspends the thread, copies its stack and register context, and records the // Suspends the thread, copies its stack, top address of the stack copy, and
// current metadata, then resumes the thread. Returns true on success, and // register context, records the current metadata, then resumes the thread.
// returns the copied state via the params. NO HEAP ALLOCATIONS within the // Returns true on success, and returns the copied state via the params. NO HEAP
// ScopedSuspendThread scope. // ALLOCATIONS within the ScopedSuspendThread scope.
bool StackSamplerImpl::CopyStack(StackBuffer* stack_buffer, bool StackSamplerImpl::CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
ProfileBuilder* profile_builder, ProfileBuilder* profile_builder,
RegisterContext* thread_context) { RegisterContext* thread_context) {
const uintptr_t top = thread_delegate_->GetStackBaseAddress(); const uintptr_t top = thread_delegate_->GetStackBaseAddress();
...@@ -136,6 +139,9 @@ bool StackSamplerImpl::CopyStack(StackBuffer* stack_buffer, ...@@ -136,6 +139,9 @@ bool StackSamplerImpl::CopyStack(StackBuffer* stack_buffer,
stack_buffer->buffer()); stack_buffer->buffer());
} }
*stack_top =
reinterpret_cast<uintptr_t>(stack_buffer->buffer()) + (top - bottom);
for (uintptr_t* reg : for (uintptr_t* reg :
thread_delegate_->GetRegistersToRewrite(thread_context)) { thread_delegate_->GetRegistersToRewrite(thread_context)) {
*reg = RewritePointerIfInOriginalStack(reinterpret_cast<uintptr_t*>(bottom), *reg = RewritePointerIfInOriginalStack(reinterpret_cast<uintptr_t*>(bottom),
...@@ -149,14 +155,16 @@ bool StackSamplerImpl::CopyStack(StackBuffer* stack_buffer, ...@@ -149,14 +155,16 @@ bool StackSamplerImpl::CopyStack(StackBuffer* stack_buffer,
// Walks the stack represented by |thread_context|, recording and returning the // Walks the stack represented by |thread_context|, recording and returning the
// frames. // frames.
std::vector<ProfileBuilder::Frame> StackSamplerImpl::WalkStack( std::vector<ProfileBuilder::Frame> StackSamplerImpl::WalkStack(
RegisterContext* thread_context) { RegisterContext* thread_context,
uintptr_t stack_top) {
std::vector<ProfileBuilder::Frame> stack; std::vector<ProfileBuilder::Frame> stack;
// Reserve enough memory for most stacks, to avoid repeated // Reserve enough memory for most stacks, to avoid repeated
// allocations. Approximately 99.9% of recorded stacks are 128 frames or // allocations. Approximately 99.9% of recorded stacks are 128 frames or
// fewer. // fewer.
stack.reserve(128); stack.reserve(128);
thread_delegate_->WalkNativeFrames(thread_context, module_cache_, &stack); thread_delegate_->WalkNativeFrames(thread_context, stack_top, module_cache_,
&stack);
return stack; return stack;
} }
......
...@@ -34,10 +34,12 @@ class BASE_EXPORT StackSamplerImpl : public StackSampler { ...@@ -34,10 +34,12 @@ class BASE_EXPORT StackSamplerImpl : public StackSampler {
private: private:
bool CopyStack(StackBuffer* stack_buffer, bool CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
ProfileBuilder* profile_builder, ProfileBuilder* profile_builder,
RegisterContext* thread_context); RegisterContext* thread_context);
std::vector<ProfileBuilder::Frame> WalkStack(RegisterContext* thread_context); std::vector<ProfileBuilder::Frame> WalkStack(RegisterContext* thread_context,
uintptr_t stack_top);
const std::unique_ptr<ThreadDelegate> thread_delegate_; const std::unique_ptr<ThreadDelegate> thread_delegate_;
ModuleCache* const module_cache_; ModuleCache* const module_cache_;
......
...@@ -96,6 +96,7 @@ class TestThreadDelegate : public ThreadDelegate { ...@@ -96,6 +96,7 @@ class TestThreadDelegate : public ThreadDelegate {
UnwindResult WalkNativeFrames( UnwindResult WalkNativeFrames(
RegisterContext* thread_context, RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache, ModuleCache* module_cache,
std::vector<ProfileBuilder::Frame>* stack) override { std::vector<ProfileBuilder::Frame>* stack) override {
if (stack_copy_) { if (stack_copy_) {
......
...@@ -2,494 +2,22 @@ ...@@ -2,494 +2,22 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "base/profiler/stack_sampler.h"
#include <libkern/OSByteOrder.h>
#include <libunwind.h>
#include <mach-o/compact_unwind_encoding.h>
#include <mach-o/getsect.h>
#include <mach-o/swap.h>
#include <mach/kern_return.h>
#include <mach/mach.h>
#include <mach/thread_act.h>
#include <mach/vm_map.h>
#include <pthread.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/syslimits.h>
#include <algorithm>
#include <memory> #include <memory>
#include "base/logging.h" #include "base/profiler/stack_sampler.h"
#include "base/mac/mach_logging.h" #include "base/profiler/stack_sampler_impl.h"
#include "base/macros.h" #include "base/profiler/thread_delegate_mac.h"
#include "base/memory/ptr_util.h"
#include "base/profiler/profile_builder.h"
#include "base/profiler/unwind_result.h"
#include "base/sampling_heap_profiler/module_cache.h"
#include "base/strings/string_number_conversions.h"
extern "C" {
void _sigtramp(int, int, struct sigset*);
}
namespace base { namespace base {
namespace {
// Stack walking --------------------------------------------------------------
// Fills |state| with |target_thread|'s context.
//
// Note that this is called while a thread is suspended. Make very very sure
// that no shared resources (e.g. memory allocators) are used for the duration
// of this function.
bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
auto count = static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
return thread_get_state(target_thread, x86_THREAD_STATE64,
reinterpret_cast<thread_state_t>(state),
&count) == KERN_SUCCESS;
}
// If the value at |pointer| points to the original stack, rewrites it to point
// to the corresponding location in the copied stack.
//
// Note that this is called while a thread is suspended. Make very very sure
// that no shared resources (e.g. memory allocators) are used for the duration
// of this function.
uintptr_t RewritePointerIfInOriginalStack(
const uintptr_t* original_stack_bottom,
const uintptr_t* original_stack_top,
uintptr_t* stack_copy_bottom,
uintptr_t pointer) {
auto original_stack_bottom_int =
reinterpret_cast<uintptr_t>(original_stack_bottom);
auto original_stack_top_int = reinterpret_cast<uintptr_t>(original_stack_top);
auto stack_copy_bottom_int = reinterpret_cast<uintptr_t>(stack_copy_bottom);
if (pointer < original_stack_bottom_int || pointer >= original_stack_top_int)
return pointer;
return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
}
// Copies the stack to a buffer while rewriting possible pointers to locations
// within the stack to point to the corresponding locations in the copy. This is
// necessary to handle stack frames with dynamic stack allocation, where a
// pointer to the beginning of the dynamic allocation area is stored on the
// stack and/or in a non-volatile register.
//
// Eager rewriting of anything that looks like a pointer to the stack, as done
// in this function, does not adversely affect the stack unwinding. The only
// other values on the stack the unwinding depends on are return addresses,
// which should not point within the stack memory. The rewriting is guaranteed
// to catch all pointers because the stacks are guaranteed by the ABI to be
// sizeof(void*) aligned.
//
// Note that this is called while a thread is suspended. Make very very sure
// that no shared resources (e.g. memory allocators) are used for the duration
// of this function.
void CopyStackAndRewritePointers(uintptr_t* stack_copy_bottom,
const uintptr_t* original_stack_bottom,
const uintptr_t* original_stack_top,
x86_thread_state64_t* thread_state)
NO_SANITIZE("address") {
size_t count = original_stack_top - original_stack_bottom;
for (size_t pos = 0; pos < count; ++pos) {
stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
original_stack_bottom, original_stack_top, stack_copy_bottom,
original_stack_bottom[pos]);
}
uint64_t* rewrite_registers[] = {&thread_state->__rbx, &thread_state->__rbp,
&thread_state->__rsp, &thread_state->__r12,
&thread_state->__r13, &thread_state->__r14,
&thread_state->__r15};
for (auto* reg : rewrite_registers) {
*reg = RewritePointerIfInOriginalStack(
original_stack_bottom, original_stack_top, stack_copy_bottom, *reg);
}
}
// Extracts the "frame offset" for a given frame from the compact unwind info.
// A frame offset indicates the location of saved non-volatile registers in
// relation to the frame pointer. See |mach-o/compact_unwind_encoding.h| for
// details.
uint32_t GetFrameOffset(int compact_unwind_info) {
// The frame offset lives in bytes 16-23. This shifts it down by the number of
// leading zeroes in the mask, then masks with (1 << number of one bits in the
// mask) - 1, turning 0x00FF0000 into 0x000000FF. Adapted from |EXTRACT_BITS|
// in libunwind's CompactUnwinder.hpp.
return (
(compact_unwind_info >> __builtin_ctz(UNWIND_X86_64_RBP_FRAME_OFFSET)) &
(((1 << __builtin_popcount(UNWIND_X86_64_RBP_FRAME_OFFSET))) - 1));
}
// True if the unwind from |leaf_frame_module| may trigger a crash bug in
// unw_init_local. If so, the stack walk should be aborted at the leaf frame.
bool MayTriggerUnwInitLocalCrash(const ModuleCache::Module* leaf_frame_module) {
// The issue here is a bug in unw_init_local that, in some unwinds, results in
// attempts to access memory at the address immediately following the address
// range of the library. When the library is the last of the mapped libraries
// that address is in a different memory region. Starting with 10.13.4 beta
// releases it appears that this region is sometimes either unmapped or mapped
// without read access, resulting in crashes on the attempted access. It's not
// clear what circumstances result in this situation; attempts to reproduce on
// a 10.13.4 beta did not trigger the issue.
//
// The workaround is to check if the memory address that would be accessed is
// readable, and if not, abort the stack walk before calling unw_init_local.
// As of 2018/03/19 about 0.1% of non-idle stacks on the UI and GPU main
// threads have a leaf frame in the last library. Since the issue appears to
// only occur some of the time it's expected that the quantity of lost samples
// will be lower than 0.1%, possibly significantly lower.
//
// TODO(lgrey): Add references above to LLVM/Radar bugs on unw_init_local once
// filed.
uint64_t unused;
vm_size_t size = sizeof(unused);
return vm_read_overwrite(
current_task(),
leaf_frame_module->GetBaseAddress() + leaf_frame_module->GetSize(),
sizeof(unused), reinterpret_cast<vm_address_t>(&unused),
&size) != 0;
}
// Check if the cursor contains a valid-looking frame pointer for frame pointer
// unwinds. If the stack frame has a frame pointer, stepping the cursor will
// involve indexing memory access off of that pointer. In that case,
// sanity-check the frame pointer register to ensure it's within bounds.
//
// Additionally, the stack frame might be in a prologue or epilogue, which can
// cause a crash when the unwinder attempts to access non-volatile registers
// that have not yet been pushed, or have already been popped from the
// stack. libwunwind will try to restore those registers using an offset from
// the frame pointer. However, since we copy the stack from RSP up, any
// locations below the stack pointer are before the beginning of the stack
// buffer. Account for this by checking that the expected location is above the
// stack pointer, and rejecting the sample if it isn't.
bool HasValidRbp(unw_cursor_t* unwind_cursor, uintptr_t stack_top) {
unw_proc_info_t proc_info;
unw_get_proc_info(unwind_cursor, &proc_info);
if ((proc_info.format & UNWIND_X86_64_MODE_MASK) ==
UNWIND_X86_64_MODE_RBP_FRAME) {
unw_word_t rsp, rbp;
unw_get_reg(unwind_cursor, UNW_X86_64_RSP, &rsp);
unw_get_reg(unwind_cursor, UNW_X86_64_RBP, &rbp);
uint32_t offset = GetFrameOffset(proc_info.format) * sizeof(unw_word_t);
if (rbp < offset || (rbp - offset) < rsp || rbp > stack_top)
return false;
}
return true;
}
const ModuleCache::Module* GetLibSystemKernelModule(ModuleCache* module_cache) {
const ModuleCache::Module* module =
module_cache->GetModuleForAddress(reinterpret_cast<uintptr_t>(ptrace));
DCHECK(module);
DCHECK_EQ(FilePath("libsystem_kernel.dylib"), module->GetDebugBasename());
return module;
}
void GetSigtrampRange(uintptr_t* start, uintptr_t* end) {
auto address = reinterpret_cast<uintptr_t>(&_sigtramp);
DCHECK(address != 0);
*start = address;
unw_context_t context;
unw_cursor_t cursor;
unw_proc_info_t info;
unw_getcontext(&context);
// Set the context's RIP to the beginning of sigtramp,
// +1 byte to work around a bug in 10.11 (crbug.com/764468).
context.data[16] = address + 1;
unw_init_local(&cursor, &context);
unw_get_proc_info(&cursor, &info);
DCHECK_EQ(info.start_ip, address);
*end = info.end_ip;
}
// ScopedSuspendThread --------------------------------------------------------
// Suspends a thread for the lifetime of the object.
class ScopedSuspendThread {
public:
explicit ScopedSuspendThread(mach_port_t thread_port)
: thread_port_(thread_suspend(thread_port) == KERN_SUCCESS
? thread_port
: MACH_PORT_NULL) {}
~ScopedSuspendThread() {
if (!was_successful())
return;
kern_return_t kr = thread_resume(thread_port_);
MACH_CHECK(kr == KERN_SUCCESS, kr) << "thread_resume";
}
bool was_successful() const { return thread_port_ != MACH_PORT_NULL; }
private:
mach_port_t thread_port_;
DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
};
} // namespace
// StackSamplerMac ------------------------------------------------------
class StackSamplerMac : public StackSampler {
public:
StackSamplerMac(mach_port_t thread_port,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate);
~StackSamplerMac() override;
// StackSamplingProfiler::StackSampler:
void RecordStackFrames(StackBuffer* stack_buffer,
ProfileBuilder* profile_builder) override;
private:
// Suspends the thread with |thread_handle|, copies its stack, register
// context, and current metadata and resumes the thread. Returns true on
// success.
static bool CopyStack(mach_port_t thread_port,
const void* base_address,
StackBuffer* stack_buffer,
ProfileBuilder* profile_builder,
x86_thread_state64_t* thread_state,
uintptr_t* stack_top);
// Walks the stack represented by |thread_state|, writing frames to |stack|.
UnwindResult WalkStack(const x86_thread_state64_t& thread_state,
uintptr_t stack_top,
std::vector<ProfileBuilder::Frame>* stack);
// Weak reference: Mach port for thread being profiled.
mach_port_t thread_port_;
// Maps a module's address range to the module.
ModuleCache* const module_cache_;
StackSamplerTestDelegate* const test_delegate_;
// The stack base address corresponding to |thread_handle_|.
const void* const thread_stack_base_address_;
// Cached pointer to the libsystem_kernel module.
const ModuleCache::Module* const libsystem_kernel_module_;
// The address range of |_sigtramp|, the signal trampoline function.
uintptr_t sigtramp_start_;
uintptr_t sigtramp_end_;
DISALLOW_COPY_AND_ASSIGN(StackSamplerMac);
};
StackSamplerMac::StackSamplerMac(mach_port_t thread_port,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate)
: thread_port_(thread_port),
module_cache_(module_cache),
test_delegate_(test_delegate),
thread_stack_base_address_(
pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
libsystem_kernel_module_(GetLibSystemKernelModule(module_cache)) {
GetSigtrampRange(&sigtramp_start_, &sigtramp_end_);
// This class suspends threads, and those threads might be suspended in dyld.
// Therefore, for all the system functions that might be linked in dynamically
// that are used while threads are suspended, make calls to them to make sure
// that they are linked up.
x86_thread_state64_t thread_state;
GetThreadState(thread_port_, &thread_state);
}
StackSamplerMac::~StackSamplerMac() {}
void StackSamplerMac::RecordStackFrames(StackBuffer* stack_buffer,
ProfileBuilder* profile_builder) {
x86_thread_state64_t thread_state;
uintptr_t stack_top;
bool success =
CopyStack(thread_port_, thread_stack_base_address_, stack_buffer,
profile_builder, &thread_state, &stack_top);
if (!success)
return;
if (test_delegate_)
test_delegate_->OnPreStackWalk();
std::vector<ProfileBuilder::Frame> stack;
// Reserve enough memory for most stacks, to avoid repeated
// allocations. Approximately 99.9% of recorded stacks are 128 frames or
// fewer.
stack.reserve(128);
// Walk the stack and record it.
WalkStack(thread_state, stack_top, &stack);
profile_builder->OnSampleCompleted(std::move(stack));
}
// static
bool StackSamplerMac::CopyStack(mach_port_t thread_port,
const void* base_address,
StackBuffer* stack_buffer,
ProfileBuilder* profile_builder,
x86_thread_state64_t* thread_state,
uintptr_t* stack_top) {
// IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
// allocate memory, including indirectly via use of DCHECK/CHECK or other
// logging statements. Otherwise this code can deadlock on heap locks acquired
// by the target thread before it was suspended.
ScopedSuspendThread suspend_thread(thread_port);
if (!suspend_thread.was_successful())
return false;
if (!GetThreadState(thread_port, thread_state))
return false;
auto top = reinterpret_cast<uintptr_t>(base_address);
uintptr_t bottom = thread_state->__rsp;
if (bottom >= top)
return false;
uintptr_t stack_size = top - bottom;
if (stack_size > stack_buffer->size())
return false;
profile_builder->RecordMetadata();
CopyStackAndRewritePointers(stack_buffer->buffer(),
reinterpret_cast<uintptr_t*>(bottom),
reinterpret_cast<uintptr_t*>(top), thread_state);
*stack_top = reinterpret_cast<uintptr_t>(stack_buffer->buffer()) + stack_size;
return true;
}
UnwindResult StackSamplerMac::WalkStack(
const x86_thread_state64_t& thread_state,
uintptr_t stack_top,
std::vector<ProfileBuilder::Frame>* stack) {
// There isn't an official way to create a unw_context other than to create it
// from the current state of the current thread's stack. Since we're walking a
// different thread's stack we must forge a context. The unw_context is just a
// copy of the 16 main registers followed by the instruction pointer, nothing
// more. Coincidentally, the first 17 items of the x86_thread_state64_t type
// are exactly those registers in exactly the same order, so just bulk copy
// them over.
unw_context_t unwind_context;
memcpy(&unwind_context, &thread_state, sizeof(uintptr_t) * 17);
// Avoid an out-of-bounds read bug in libunwind that can crash us in some
// circumstances. If we're subject to that case, just record the first frame
// and bail. See MayTriggerUnwInitLocalCrash for details.
const ModuleCache::Module* leaf_frame_module =
module_cache_->GetModuleForAddress(thread_state.__rip);
if (leaf_frame_module && MayTriggerUnwInitLocalCrash(leaf_frame_module)) {
stack->emplace_back(thread_state.__rip, leaf_frame_module);
return UnwindResult::ABORTED;
}
unw_cursor_t unwind_cursor;
unw_init_local(&unwind_cursor, &unwind_context);
bool at_top_frame = true;
int step_result;
do {
unw_word_t instruction_pointer;
unw_get_reg(&unwind_cursor, UNW_REG_IP, &instruction_pointer);
const ModuleCache::Module* module =
module_cache_->GetModuleForAddress(instruction_pointer);
if (!module) {
// There's no loaded module containing the instruction pointer. This is
// due to either executing code that is not in a module (e.g. V8
// runtime-generated code), or to a previous bad unwind.
//
// The bad unwind scenario can occur in frameless (non-DWARF) unwinding,
// which works by fetching the function's stack size from the unwind
// encoding or stack, and adding it to the stack pointer to determine the
// function's return address.
//
// If we're in a function prologue or epilogue, the actual stack size may
// be smaller than it will be during the normal course of execution. When
// libunwind adds the expected stack size, it will look for the return
// address in the wrong place. This check ensures we don't continue trying
// to unwind using the resulting bad IP value.
//
// We return UNRECOGNIZED_FRAME on the optimistic assumption that this may
// be a frame the AuxUnwinder knows how to handle (e.g. a frame in V8
// generated code).
return UnwindResult::UNRECOGNIZED_FRAME;
}
// Record the frame.
stack->emplace_back(instruction_pointer, module);
// Don't continue if we're in sigtramp. Unwinding this from another thread
// is very fragile. It's a complex DWARF unwind that needs to restore the
// entire thread context which was saved by the kernel when the interrupt
// occurred.
if (instruction_pointer >= sigtramp_start_ &&
instruction_pointer < sigtramp_end_) {
return UnwindResult::ABORTED;
}
// Don't continue if rbp appears to be invalid (due to a previous bad
// unwind).
if (!HasValidRbp(&unwind_cursor, stack_top))
return UnwindResult::ABORTED;
step_result = unw_step(&unwind_cursor);
if (step_result == 0 && at_top_frame) {
// libunwind is designed to be triggered by user code on their own thread,
// if it hits a library that has no unwind info for the function that is
// being executed, it just stops. This isn't a problem in the normal case,
// but in this case, it's quite possible that the stack being walked is
// stopped in a function that bridges to the kernel and thus is missing
// the unwind info.
// For now, just unwind the single case where the thread is stopped in a
// function in libsystem_kernel.
uint64_t& rsp = unwind_context.data[7];
uint64_t& rip = unwind_context.data[16];
if (module_cache_->GetModuleForAddress(rip) == libsystem_kernel_module_) {
rip = *reinterpret_cast<uint64_t*>(rsp);
rsp += 8;
// Reset the cursor.
unw_init_local(&unwind_cursor, &unwind_context);
// Mock a successful step_result.
step_result = 1;
}
}
at_top_frame = false;
} while (step_result > 0);
return UnwindResult::COMPLETED;
}
// StackSampler ---------------------------------------------------------
// static // static
std::unique_ptr<StackSampler> StackSampler::Create( std::unique_ptr<StackSampler> StackSampler::Create(
PlatformThreadId thread_id, PlatformThreadId thread_id,
ModuleCache* module_cache, ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate) { StackSamplerTestDelegate* test_delegate) {
return std::make_unique<StackSamplerMac>(thread_id, module_cache, return std::make_unique<StackSamplerImpl>(
test_delegate); std::make_unique<ThreadDelegateMac>(thread_id, module_cache),
module_cache, test_delegate);
} }
// static // static
......
...@@ -72,6 +72,7 @@ class BASE_EXPORT ThreadDelegate { ...@@ -72,6 +72,7 @@ class BASE_EXPORT ThreadDelegate {
// TODO(wittman): Move the unwinding support into a separate UnwindDelegate. // TODO(wittman): Move the unwinding support into a separate UnwindDelegate.
virtual UnwindResult WalkNativeFrames( virtual UnwindResult WalkNativeFrames(
RegisterContext* thread_context, RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache, ModuleCache* module_cache,
std::vector<ProfileBuilder::Frame>* stack) = 0; std::vector<ProfileBuilder::Frame>* stack) = 0;
}; };
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/thread_delegate_mac.h"
#include <libkern/OSByteOrder.h>
#include <libunwind.h>
#include <mach-o/compact_unwind_encoding.h>
#include <mach-o/getsect.h>
#include <mach-o/swap.h>
#include <mach/kern_return.h>
#include <mach/mach.h>
#include <mach/thread_act.h>
#include <mach/vm_map.h>
#include <pthread.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/syslimits.h>
#include <algorithm>
#include <memory>
#include "base/logging.h"
#include "base/mac/mach_logging.h"
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/profiler/profile_builder.h"
#include "base/sampling_heap_profiler/module_cache.h"
#include "base/strings/string_number_conversions.h"
// IMPORTANT NOTE: Some functions within this implementation are invoked while
// the target thread is suspended so it must not do any allocation from the
// heap, including indirectly via use of DCHECK/CHECK or other logging
// statements. Otherwise this code can deadlock on heap locks acquired by the
// target thread before it was suspended. These functions are commented with "NO
// HEAP ALLOCATIONS".
extern "C" {
void _sigtramp(int, int, struct sigset*);
}
namespace base {
namespace {
// Fills |state| with |target_thread|'s context. NO HEAP ALLOCATIONS.
bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
auto count = static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
return thread_get_state(target_thread, x86_THREAD_STATE64,
reinterpret_cast<thread_state_t>(state),
&count) == KERN_SUCCESS;
}
// Extracts the "frame offset" for a given frame from the compact unwind info.
// A frame offset indicates the location of saved non-volatile registers in
// relation to the frame pointer. See |mach-o/compact_unwind_encoding.h| for
// details.
uint32_t GetFrameOffset(int compact_unwind_info) {
// The frame offset lives in bytes 16-23. This shifts it down by the number of
// leading zeroes in the mask, then masks with (1 << number of one bits in the
// mask) - 1, turning 0x00FF0000 into 0x000000FF. Adapted from |EXTRACT_BITS|
// in libunwind's CompactUnwinder.hpp.
return (
(compact_unwind_info >> __builtin_ctz(UNWIND_X86_64_RBP_FRAME_OFFSET)) &
(((1 << __builtin_popcount(UNWIND_X86_64_RBP_FRAME_OFFSET))) - 1));
}
// True if the unwind from |leaf_frame_module| may trigger a crash bug in
// unw_init_local. If so, the stack walk should be aborted at the leaf frame.
bool MayTriggerUnwInitLocalCrash(const ModuleCache::Module* leaf_frame_module) {
// The issue here is a bug in unw_init_local that, in some unwinds, results in
// attempts to access memory at the address immediately following the address
// range of the library. When the library is the last of the mapped libraries
// that address is in a different memory region. Starting with 10.13.4 beta
// releases it appears that this region is sometimes either unmapped or mapped
// without read access, resulting in crashes on the attempted access. It's not
// clear what circumstances result in this situation; attempts to reproduce on
// a 10.13.4 beta did not trigger the issue.
//
// The workaround is to check if the memory address that would be accessed is
// readable, and if not, abort the stack walk before calling unw_init_local.
// As of 2018/03/19 about 0.1% of non-idle stacks on the UI and GPU main
// threads have a leaf frame in the last library. Since the issue appears to
// only occur some of the time it's expected that the quantity of lost samples
// will be lower than 0.1%, possibly significantly lower.
//
// TODO(lgrey): Add references above to LLVM/Radar bugs on unw_init_local once
// filed.
uint64_t unused;
vm_size_t size = sizeof(unused);
return vm_read_overwrite(
current_task(),
leaf_frame_module->GetBaseAddress() + leaf_frame_module->GetSize(),
sizeof(unused), reinterpret_cast<vm_address_t>(&unused),
&size) != 0;
}
// Check if the cursor contains a valid-looking frame pointer for frame pointer
// unwinds. If the stack frame has a frame pointer, stepping the cursor will
// involve indexing memory access off of that pointer. In that case,
// sanity-check the frame pointer register to ensure it's within bounds.
//
// Additionally, the stack frame might be in a prologue or epilogue, which can
// cause a crash when the unwinder attempts to access non-volatile registers
// that have not yet been pushed, or have already been popped from the
// stack. libwunwind will try to restore those registers using an offset from
// the frame pointer. However, since we copy the stack from RSP up, any
// locations below the stack pointer are before the beginning of the stack
// buffer. Account for this by checking that the expected location is above the
// stack pointer, and rejecting the sample if it isn't.
bool HasValidRbp(unw_cursor_t* unwind_cursor, uintptr_t stack_top) {
unw_proc_info_t proc_info;
unw_get_proc_info(unwind_cursor, &proc_info);
if ((proc_info.format & UNWIND_X86_64_MODE_MASK) ==
UNWIND_X86_64_MODE_RBP_FRAME) {
unw_word_t rsp, rbp;
unw_get_reg(unwind_cursor, UNW_X86_64_RSP, &rsp);
unw_get_reg(unwind_cursor, UNW_X86_64_RBP, &rbp);
uint32_t offset = GetFrameOffset(proc_info.format) * sizeof(unw_word_t);
if (rbp < offset || (rbp - offset) < rsp || rbp > stack_top)
return false;
}
return true;
}
const ModuleCache::Module* GetLibSystemKernelModule(ModuleCache* module_cache) {
const ModuleCache::Module* module =
module_cache->GetModuleForAddress(reinterpret_cast<uintptr_t>(ptrace));
DCHECK(module);
DCHECK_EQ(FilePath("libsystem_kernel.dylib"), module->GetDebugBasename());
return module;
}
void GetSigtrampRange(uintptr_t* start, uintptr_t* end) {
auto address = reinterpret_cast<uintptr_t>(&_sigtramp);
DCHECK(address != 0);
*start = address;
unw_context_t context;
unw_cursor_t cursor;
unw_proc_info_t info;
unw_getcontext(&context);
// Set the context's RIP to the beginning of sigtramp,
// +1 byte to work around a bug in 10.11 (crbug.com/764468).
context.data[16] = address + 1;
unw_init_local(&cursor, &context);
unw_get_proc_info(&cursor, &info);
DCHECK_EQ(info.start_ip, address);
*end = info.end_ip;
}
} // namespace
// ScopedSuspendThread --------------------------------------------------------
// NO HEAP ALLOCATIONS after thread_suspend.
ThreadDelegateMac::ScopedSuspendThread::ScopedSuspendThread(
mach_port_t thread_port)
: thread_port_(thread_suspend(thread_port) == KERN_SUCCESS
? thread_port
: MACH_PORT_NULL) {}
// NO HEAP ALLOCATIONS. The MACH_CHECK is OK because it provides a more noisy
// failure mode than deadlocking.
ThreadDelegateMac::ScopedSuspendThread::~ScopedSuspendThread() {
if (!WasSuccessful())
return;
kern_return_t kr = thread_resume(thread_port_);
MACH_CHECK(kr == KERN_SUCCESS, kr) << "thread_resume";
}
bool ThreadDelegateMac::ScopedSuspendThread::WasSuccessful() const {
return thread_port_ != MACH_PORT_NULL;
}
// ThreadDelegateMac ----------------------------------------------------------
ThreadDelegateMac::ThreadDelegateMac(mach_port_t thread_port,
ModuleCache* module_cache)
: thread_port_(thread_port),
thread_stack_base_address_(reinterpret_cast<uintptr_t>(
pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port)))),
libsystem_kernel_module_(GetLibSystemKernelModule(module_cache)) {
GetSigtrampRange(&sigtramp_start_, &sigtramp_end_);
// This class suspends threads, and those threads might be suspended in dyld.
// Therefore, for all the system functions that might be linked in dynamically
// that are used while threads are suspended, make calls to them to make sure
// that they are linked up.
x86_thread_state64_t thread_state;
GetThreadState(thread_port_, &thread_state);
}
ThreadDelegateMac::~ThreadDelegateMac() = default;
std::unique_ptr<ThreadDelegate::ScopedSuspendThread>
ThreadDelegateMac::CreateScopedSuspendThread() {
return std::make_unique<ScopedSuspendThread>(thread_port_);
}
// NO HEAP ALLOCATIONS.
bool ThreadDelegateMac::GetThreadContext(x86_thread_state64_t* thread_context) {
return GetThreadState(thread_port_, thread_context);
}
// NO HEAP ALLOCATIONS.
uintptr_t ThreadDelegateMac::GetStackBaseAddress() const {
return thread_stack_base_address_;
}
// NO HEAP ALLOCATIONS.
bool ThreadDelegateMac::CanCopyStack(uintptr_t stack_pointer) {
return true;
}
std::vector<uintptr_t*> ThreadDelegateMac::GetRegistersToRewrite(
x86_thread_state64_t* thread_context) {
return {
&AsUintPtr(&thread_context->__rbx), &AsUintPtr(&thread_context->__rbp),
&AsUintPtr(&thread_context->__rsp), &AsUintPtr(&thread_context->__r12),
&AsUintPtr(&thread_context->__r13), &AsUintPtr(&thread_context->__r14),
&AsUintPtr(&thread_context->__r15)};
}
UnwindResult ThreadDelegateMac::WalkNativeFrames(
x86_thread_state64_t* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<ProfileBuilder::Frame>* stack) {
// There isn't an official way to create a unw_context other than to create it
// from the current state of the current thread's stack. Since we're walking a
// different thread's stack we must forge a context. The unw_context is just a
// copy of the 16 main registers followed by the instruction pointer, nothing
// more. Coincidentally, the first 17 items of the x86_thread_state64_t type
// are exactly those registers in exactly the same order, so just bulk copy
// them over.
unw_context_t unwind_context;
memcpy(&unwind_context, thread_context, sizeof(uintptr_t) * 17);
// Avoid an out-of-bounds read bug in libunwind that can crash us in some
// circumstances. If we're subject to that case, just record the first frame
// and bail. See MayTriggerUnwInitLocalCrash for details.
const ModuleCache::Module* leaf_frame_module =
module_cache->GetModuleForAddress(thread_context->__rip);
if (leaf_frame_module && MayTriggerUnwInitLocalCrash(leaf_frame_module)) {
stack->emplace_back(thread_context->__rip, leaf_frame_module);
return UnwindResult::ABORTED;
}
unw_cursor_t unwind_cursor;
unw_init_local(&unwind_cursor, &unwind_context);
bool at_top_frame = true;
int step_result;
do {
unw_word_t instruction_pointer;
unw_get_reg(&unwind_cursor, UNW_REG_IP, &instruction_pointer);
const ModuleCache::Module* module =
module_cache->GetModuleForAddress(instruction_pointer);
if (!module) {
// There's no loaded module containing the instruction pointer. This is
// due to either executing code that is not in a module (e.g. V8
// runtime-generated code), or to a previous bad unwind.
//
// The bad unwind scenario can occur in frameless (non-DWARF) unwinding,
// which works by fetching the function's stack size from the unwind
// encoding or stack, and adding it to the stack pointer to determine the
// function's return address.
//
// If we're in a function prologue or epilogue, the actual stack size may
// be smaller than it will be during the normal course of execution. When
// libunwind adds the expected stack size, it will look for the return
// address in the wrong place. This check ensures we don't continue trying
// to unwind using the resulting bad IP value.
//
// We return UNRECOGNIZED_FRAME on the optimistic assumption that this may
// be a frame the AuxUnwinder knows how to handle (e.g. a frame in V8
// generated code).
return UnwindResult::UNRECOGNIZED_FRAME;
}
// Record the frame.
stack->emplace_back(instruction_pointer, module);
// Don't continue if we're in sigtramp. Unwinding this from another thread
// is very fragile. It's a complex DWARF unwind that needs to restore the
// entire thread context which was saved by the kernel when the interrupt
// occurred.
if (instruction_pointer >= sigtramp_start_ &&
instruction_pointer < sigtramp_end_) {
return UnwindResult::ABORTED;
}
// Don't continue if rbp appears to be invalid (due to a previous bad
// unwind).
if (!HasValidRbp(&unwind_cursor, stack_top))
return UnwindResult::ABORTED;
step_result = unw_step(&unwind_cursor);
if (step_result == 0 && at_top_frame) {
// libunwind is designed to be triggered by user code on their own thread,
// if it hits a library that has no unwind info for the function that is
// being executed, it just stops. This isn't a problem in the normal case,
// but in this case, it's quite possible that the stack being walked is
// stopped in a function that bridges to the kernel and thus is missing
// the unwind info.
// For now, just unwind the single case where the thread is stopped in a
// function in libsystem_kernel.
uint64_t& rsp = unwind_context.data[7];
uint64_t& rip = unwind_context.data[16];
if (module_cache->GetModuleForAddress(rip) == libsystem_kernel_module_) {
rip = *reinterpret_cast<uint64_t*>(rsp);
rsp += 8;
// Reset the cursor.
unw_init_local(&unwind_cursor, &unwind_context);
// Mock a successful step_result.
step_result = 1;
}
}
at_top_frame = false;
} while (step_result > 0);
return UnwindResult::COMPLETED;
}
} // namespace base
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_THREAD_DELEGATE_MAC_H_
#define BASE_PROFILER_THREAD_DELEGATE_MAC_H_
#include <mach/mach.h>
#include "base/base_export.h"
#include "base/profiler/thread_delegate.h"
#include "base/sampling_heap_profiler/module_cache.h"
#include "base/threading/platform_thread.h"
namespace base {
// Platform- and thread-specific implementation in support of stack sampling on
// Mac.
class BASE_EXPORT ThreadDelegateMac : public ThreadDelegate {
public:
class ScopedSuspendThread : public ThreadDelegate::ScopedSuspendThread {
public:
explicit ScopedSuspendThread(mach_port_t thread_port);
~ScopedSuspendThread() override;
ScopedSuspendThread(const ScopedSuspendThread&) = delete;
ScopedSuspendThread& operator=(const ScopedSuspendThread&) = delete;
bool WasSuccessful() const override;
private:
mach_port_t thread_port_;
};
ThreadDelegateMac(mach_port_t thread_port, ModuleCache* module_cache);
~ThreadDelegateMac() override;
ThreadDelegateMac(const ThreadDelegateMac&) = delete;
ThreadDelegateMac& operator=(const ThreadDelegateMac&) = delete;
// ThreadDelegate
std::unique_ptr<ThreadDelegate::ScopedSuspendThread>
CreateScopedSuspendThread() override;
bool GetThreadContext(x86_thread_state64_t* thread_context) override;
uintptr_t GetStackBaseAddress() const override;
bool CanCopyStack(uintptr_t stack_pointer) override;
std::vector<uintptr_t*> GetRegistersToRewrite(
x86_thread_state64_t* thread_context) override;
UnwindResult WalkNativeFrames(
x86_thread_state64_t* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<ProfileBuilder::Frame>* stack) override;
private:
// Weak reference: Mach port for thread being profiled.
mach_port_t thread_port_;
// The stack base address corresponding to |thread_port_|.
const uintptr_t thread_stack_base_address_;
// Cached pointer to the libsystem_kernel module.
const ModuleCache::Module* const libsystem_kernel_module_;
// The address range of |_sigtramp|, the signal trampoline function.
uintptr_t sigtramp_start_;
uintptr_t sigtramp_end_;
};
} // namespace base
#endif // BASE_PROFILER_THREAD_DELEGATE_MAC_H_
...@@ -202,6 +202,7 @@ std::vector<uintptr_t*> ThreadDelegateWin::GetRegistersToRewrite( ...@@ -202,6 +202,7 @@ std::vector<uintptr_t*> ThreadDelegateWin::GetRegistersToRewrite(
UnwindResult ThreadDelegateWin::WalkNativeFrames( UnwindResult ThreadDelegateWin::WalkNativeFrames(
CONTEXT* thread_context, CONTEXT* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache, ModuleCache* module_cache,
std::vector<ProfileBuilder::Frame>* stack) { std::vector<ProfileBuilder::Frame>* stack) {
Win32StackFrameUnwinder frame_unwinder; Win32StackFrameUnwinder frame_unwinder;
......
...@@ -49,6 +49,7 @@ class BASE_EXPORT ThreadDelegateWin : public ThreadDelegate { ...@@ -49,6 +49,7 @@ class BASE_EXPORT ThreadDelegateWin : public ThreadDelegate {
UnwindResult WalkNativeFrames( UnwindResult WalkNativeFrames(
CONTEXT* thread_context, CONTEXT* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache, ModuleCache* module_cache,
std::vector<ProfileBuilder::Frame>* stack) override; std::vector<ProfileBuilder::Frame>* stack) override;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment