Commit da7a0571 authored by Alexei Filippov's avatar Alexei Filippov Committed by Commit Bot

[heap profiler] Make use base::SamplingHeapProfiler helper for memlog.

Extend the implementation of base::SamplingHeapProfiler to support:
  * PSEUDO and MIXED call stack modes
  * Allocation context recording
  * Thread name recording

Switch SamplingProfilerWrapper to use base::SamplingHeapProfiler for the
in-process mode.

TBR=dcheng@chromium.org
BUG=923459

Change-Id: I9783a9c60f287332d5fb7410fbe672ee83010ece
Reviewed-on: https://chromium-review.googlesource.com/c/1492951
Commit-Queue: Alexei Filippov <alph@chromium.org>
Reviewed-by: default avatarErik Chen <erikchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#636953}
parent a9029142
...@@ -50,6 +50,7 @@ class BASE_EXPORT PoissonAllocationSampler { ...@@ -50,6 +50,7 @@ class BASE_EXPORT PoissonAllocationSampler {
// within the object scope for the current thread. // within the object scope for the current thread.
// It allows observers to allocate/deallocate memory while holding a lock // It allows observers to allocate/deallocate memory while holding a lock
// without a chance to get into reentrancy problems. // without a chance to get into reentrancy problems.
// The current implementation doesn't support ScopedMuteThreadSamples nesting.
class BASE_EXPORT ScopedMuteThreadSamples { class BASE_EXPORT ScopedMuteThreadSamples {
public: public:
ScopedMuteThreadSamples(); ScopedMuteThreadSamples();
......
...@@ -11,14 +11,25 @@ ...@@ -11,14 +11,25 @@
#include "base/allocator/allocator_shim.h" #include "base/allocator/allocator_shim.h"
#include "base/allocator/buildflags.h" #include "base/allocator/buildflags.h"
#include "base/allocator/partition_allocator/partition_alloc.h" #include "base/allocator/partition_allocator/partition_alloc.h"
#include "base/bind.h"
#include "base/debug/stack_trace.h" #include "base/debug/stack_trace.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/no_destructor.h" #include "base/no_destructor.h"
#include "base/partition_alloc_buildflags.h" #include "base/partition_alloc_buildflags.h"
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h" #include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include "base/threading/thread_id_name_manager.h"
#include "base/threading/thread_local_storage.h" #include "base/threading/thread_local_storage.h"
#include "base/trace_event/heap_profiler_allocation_context_tracker.h"
#include "build/build_config.h" #include "build/build_config.h"
#if defined(OS_MACOSX)
#include <pthread.h>
#endif
#if defined(OS_LINUX) || defined(OS_ANDROID)
#include <sys/prctl.h>
#endif
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \ #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
defined(OFFICIAL_BUILD) defined(OFFICIAL_BUILD)
#include "base/trace_event/cfi_backtrace_android.h" #include "base/trace_event/cfi_backtrace_android.h"
...@@ -26,6 +37,54 @@ ...@@ -26,6 +37,54 @@
namespace base { namespace base {
constexpr uint32_t kMaxStackEntries = 256;
namespace {
// If a thread name has been set from ThreadIdNameManager, use that. Otherwise,
// gets the thread name from kernel if available or returns a string with id.
// This function intentionally leaks the allocated strings since they are used
// to tag allocations even after the thread dies.
const char* GetAndLeakThreadName() {
const char* thread_name =
base::ThreadIdNameManager::GetInstance()->GetNameForCurrentThread();
if (thread_name && *thread_name != '\0')
return thread_name;
// prctl requires 16 bytes, snprintf requires 19, pthread_getname_np requires
// 64 on macOS, see PlatformThread::SetName in platform_thread_mac.mm.
constexpr size_t kBufferLen = 64;
char name[kBufferLen];
#if defined(OS_LINUX) || defined(OS_ANDROID)
// If the thread name is not set, try to get it from prctl. Thread name might
// not be set in cases where the thread started before heap profiling was
// enabled.
int err = prctl(PR_GET_NAME, name);
if (!err)
return strdup(name);
#elif defined(OS_MACOSX)
int err = pthread_getname_np(pthread_self(), name, kBufferLen);
if (err == 0 && *name != '\0')
return strdup(name);
#endif // defined(OS_LINUX) || defined(OS_ANDROID)
// Use tid if we don't have a thread name.
snprintf(name, sizeof(name), "Thread %lu",
static_cast<unsigned long>(base::PlatformThread::CurrentId()));
return strdup(name);
}
const char* UpdateAndGetThreadName(const char* name) {
static thread_local const char* thread_name;
if (name)
thread_name = name;
if (!thread_name)
thread_name = GetAndLeakThreadName();
return thread_name;
}
} // namespace
SamplingHeapProfiler::Sample::Sample(size_t size, SamplingHeapProfiler::Sample::Sample(size_t size,
size_t total, size_t total,
uint32_t ordinal) uint32_t ordinal)
...@@ -58,54 +117,139 @@ void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) { ...@@ -58,54 +117,139 @@ void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
PoissonAllocationSampler::Get()->SetSamplingInterval(sampling_interval); PoissonAllocationSampler::Get()->SetSamplingInterval(sampling_interval);
} }
namespace { void SamplingHeapProfiler::SetRecordThreadNames(bool value) {
void RecordStackTrace(SamplingHeapProfiler::Sample* sample) { record_thread_names_ = value;
#if !defined(OS_NACL) if (value) {
constexpr uint32_t kMaxStackEntries = 256; base::ThreadIdNameManager::GetInstance()->InstallSetNameCallback(
constexpr uint32_t kSkipProfilerOwnFrames = 2; base::BindRepeating(IgnoreResult(&UpdateAndGetThreadName)));
uint32_t skip_frames = kSkipProfilerOwnFrames; }
}
// static
const char* SamplingHeapProfiler::CachedThreadName() {
return UpdateAndGetThreadName(nullptr);
}
// static
void** SamplingHeapProfiler::CaptureStackTrace(void** frames,
size_t max_entries,
size_t* count) {
// Skip 5 top frames related to the profiler itself, e.g.:
// base::debug::CollectStackTrace
// heap_profiling::CaptureStackTrace
// heap_profiling::RecordAndSendAlloc
// SamplingProfilerWrapper::SampleAdded
// sampling_heap_profiler::PoissonAllocationSampler::DoRecordAlloc
size_t skip_frames = 5;
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \ #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
defined(OFFICIAL_BUILD) defined(OFFICIAL_BUILD)
const void* frames[kMaxStackEntries];
size_t frame_count = size_t frame_count =
trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind( base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind(
frames, kMaxStackEntries); const_cast<const void**>(frames), max_entries);
#elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS) #elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
const void* frames[kMaxStackEntries]; size_t frame_count = base::debug::TraceStackFramePointers(
size_t frame_count = const_cast<const void**>(frames), max_entries, skip_frames);
debug::TraceStackFramePointers(frames, kMaxStackEntries, skip_frames);
skip_frames = 0; skip_frames = 0;
#else #else
// Fall-back to capturing the stack with debug::StackTrace, // Fall-back to capturing the stack with base::debug::CollectStackTrace,
// which is likely slower, but more reliable. // which is likely slower, but more reliable.
debug::StackTrace stack_trace(kMaxStackEntries); size_t frame_count =
size_t frame_count = 0; base::debug::CollectStackTrace(const_cast<void**>(frames), max_entries);
const void* const* frames = stack_trace.Addresses(&frame_count);
#endif #endif
sample->stack.insert( skip_frames = std::min(skip_frames, frame_count);
sample->stack.end(), const_cast<void**>(&frames[skip_frames]), *count = frame_count - skip_frames;
const_cast<void**>(&frames[std::max<size_t>(frame_count, skip_frames)])); return frames + skip_frames;
#endif
} }
} // namespace
void SamplingHeapProfiler::SampleAdded(void* address, void SamplingHeapProfiler::SampleAdded(
size_t size, void* address,
size_t total, size_t size,
PoissonAllocationSampler::AllocatorType, size_t total,
const char*) { PoissonAllocationSampler::AllocatorType type,
const char* context) {
DCHECK(PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
AutoLock lock(mutex_); AutoLock lock(mutex_);
Sample sample(size, total, ++last_sample_ordinal_); Sample sample(size, total, ++last_sample_ordinal_);
RecordStackTrace(&sample); sample.allocator = type;
using CaptureMode = trace_event::AllocationContextTracker::CaptureMode;
CaptureMode capture_mode =
trace_event::AllocationContextTracker::capture_mode();
if (capture_mode == CaptureMode::PSEUDO_STACK ||
capture_mode == CaptureMode::MIXED_STACK) {
CaptureMixedStack(context, &sample);
} else {
CaptureNativeStack(context, &sample);
}
RecordString(sample.context);
samples_.emplace(address, std::move(sample)); samples_.emplace(address, std::move(sample));
} }
void SamplingHeapProfiler::CaptureMixedStack(const char* context,
Sample* sample) {
// Allocation context is tracked in TLS. Return nothing if TLS was destroyed.
if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
return;
auto* tracker =
trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
if (!tracker)
return;
trace_event::AllocationContext allocation_context;
if (!tracker->GetContextSnapshot(&allocation_context))
return;
const base::trace_event::Backtrace& backtrace = allocation_context.backtrace;
CHECK_LE(backtrace.frame_count, kMaxStackEntries);
std::vector<void*> stack;
stack.reserve(backtrace.frame_count);
for (int i = base::checked_cast<int>(backtrace.frame_count) - 1; i >= 0;
--i) {
const base::trace_event::StackFrame& frame = backtrace.frames[i];
if (frame.type != base::trace_event::StackFrame::Type::PROGRAM_COUNTER)
RecordString(static_cast<const char*>(frame.value));
stack.push_back(const_cast<void*>(frame.value));
}
sample->stack = std::move(stack);
if (!context)
context = allocation_context.type_name;
sample->context = context;
}
void SamplingHeapProfiler::CaptureNativeStack(const char* context,
Sample* sample) {
void* stack[kMaxStackEntries];
size_t frame_count;
// One frame is reserved for the thread name.
void** first_frame =
CaptureStackTrace(stack, kMaxStackEntries - 1, &frame_count);
DCHECK_LT(frame_count, kMaxStackEntries);
sample->stack.assign(first_frame, first_frame + frame_count);
if (record_thread_names_)
sample->thread_name = CachedThreadName();
// Task context require access to TLS.
if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
return;
if (!context) {
const auto* tracker =
trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
if (tracker)
context = tracker->TaskContext();
}
sample->context = context;
}
const char* SamplingHeapProfiler::RecordString(const char* string) {
return string ? *strings_.insert(string).first : nullptr;
}
void SamplingHeapProfiler::SampleRemoved(void* address) { void SamplingHeapProfiler::SampleRemoved(void* address) {
AutoLock lock(mutex_); DCHECK(base::PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
auto it = samples_.find(address); base::AutoLock lock(mutex_);
if (it != samples_.end()) samples_.erase(address);
samples_.erase(it);
} }
std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples( std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
...@@ -125,6 +269,12 @@ std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples( ...@@ -125,6 +269,12 @@ std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
return samples; return samples;
} }
std::vector<const char*> SamplingHeapProfiler::GetStrings() {
PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
AutoLock lock(mutex_);
return std::vector<const char*>(strings_.begin(), strings_.end());
}
// static // static
void SamplingHeapProfiler::Init() { void SamplingHeapProfiler::Init() {
PoissonAllocationSampler::Init(); PoissonAllocationSampler::Init();
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_ #define BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include "base/base_export.h" #include "base/base_export.h"
...@@ -30,24 +31,66 @@ class BASE_EXPORT SamplingHeapProfiler ...@@ -30,24 +31,66 @@ class BASE_EXPORT SamplingHeapProfiler
Sample(const Sample&); Sample(const Sample&);
~Sample(); ~Sample();
size_t size; // Allocation size. // Allocation size.
size_t total; // Total size attributed to the sample. size_t size;
// Total size attributed to the sample.
size_t total;
// Type of the allocator.
PoissonAllocationSampler::AllocatorType allocator;
// Context as provided by the allocation hook.
const char* context = nullptr;
// Name of the thread that made the sampled allocation.
const char* thread_name = nullptr;
// Call stack of PC addresses responsible for the allocation.
// If AllocationContextTracker::capture_mode() is in PSEUDO or MIXED modes
// the frame pointers may point to the name strings instead of PCs. In this
// cases all the strings pointers are also reported with |GetStrings| method
// of the |SamplingHeapProfiler|. This way they can be distinguished from
// the PC pointers.
std::vector<void*> stack; std::vector<void*> stack;
private: private:
friend class SamplingHeapProfiler; friend class SamplingHeapProfiler;
Sample(size_t, size_t total, uint32_t ordinal); Sample(size_t size, size_t total, uint32_t ordinal);
uint32_t ordinal; uint32_t ordinal;
}; };
// Starts collecting allocation samples. Returns the current profile_id.
// This value can then be passed to |GetSamples| to retrieve only samples
// recorded since the corresponding |Start| invocation.
uint32_t Start(); uint32_t Start();
// Stops recording allocation samples.
void Stop(); void Stop();
// Sets sampling interval in bytes.
void SetSamplingInterval(size_t sampling_interval); void SetSamplingInterval(size_t sampling_interval);
// Enables recording thread name that made the sampled allocation.
void SetRecordThreadNames(bool value);
// Returns the current thread name.
static const char* CachedThreadName();
// Returns current samples recorded for the profile session.
// If |profile_id| is set to the value returned by the |Start| method,
// it returns only the samples recorded after the corresponding |Start|
// invocation. To retrieve all the collected samples |profile_id| must be
// set to 0.
std::vector<Sample> GetSamples(uint32_t profile_id); std::vector<Sample> GetSamples(uint32_t profile_id);
// List of strings used in the profile call stacks.
std::vector<const char*> GetStrings();
// Captures up to |max_entries| stack frames using the buffer pointed by
// |frames|. Puts the number of captured frames into the |count| output
// parameters. Returns the pointer to the topmost frame.
static void** CaptureStackTrace(void** frames,
size_t max_entries,
size_t* count);
static void Init(); static void Init();
static SamplingHeapProfiler* Get(); static SamplingHeapProfiler* Get();
...@@ -63,10 +106,30 @@ class BASE_EXPORT SamplingHeapProfiler ...@@ -63,10 +106,30 @@ class BASE_EXPORT SamplingHeapProfiler
const char* context) override; const char* context) override;
void SampleRemoved(void* address) override; void SampleRemoved(void* address) override;
void CaptureMixedStack(const char* context, Sample* sample);
void CaptureNativeStack(const char* context, Sample* sample);
const char* RecordString(const char* string);
// Mutex to access |samples_| and |strings_|.
Lock mutex_; Lock mutex_;
// Samples of the currently live allocations.
std::unordered_map<void*, Sample> samples_; std::unordered_map<void*, Sample> samples_;
// When CaptureMode::PSEUDO_STACK or CaptureMode::MIXED_STACK is enabled
// the call stack contents of samples may contain strings besides
// PC addresses.
// In this case each string pointer is also added to the |strings_| set.
// The set does only contain pointers to static strings that are never
// deleted.
std::unordered_set<const char*> strings_;
// Last sample ordinal used to mark samples recorded during single session.
uint32_t last_sample_ordinal_ = 1; uint32_t last_sample_ordinal_ = 1;
// Whether it should record thread names.
bool record_thread_names_ = false;
friend class NoDestructor<SamplingHeapProfiler>; friend class NoDestructor<SamplingHeapProfiler>;
DISALLOW_COPY_AND_ASSIGN(SamplingHeapProfiler); DISALLOW_COPY_AND_ASSIGN(SamplingHeapProfiler);
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
namespace heap_profiling { namespace heap_profiling {
class ScopedAllowAlloc; class ScopedAllowAlloc;
class ScopedAllowRealloc;
} // namespace heap_profiling } // namespace heap_profiling
namespace ui { namespace ui {
...@@ -29,6 +28,8 @@ class TLSDestructionCheckerForX11; ...@@ -29,6 +28,8 @@ class TLSDestructionCheckerForX11;
namespace base { namespace base {
class SamplingHeapProfiler;
namespace debug { namespace debug {
class GlobalActivityTracker; class GlobalActivityTracker;
} // namespace debug } // namespace debug
...@@ -160,11 +161,11 @@ class BASE_EXPORT ThreadLocalStorage { ...@@ -160,11 +161,11 @@ class BASE_EXPORT ThreadLocalStorage {
// thread destruction. Attempting to call Slot::Get() during destruction is // thread destruction. Attempting to call Slot::Get() during destruction is
// disallowed and will hit a DCHECK. Any code that relies on TLS during thread // disallowed and will hit a DCHECK. Any code that relies on TLS during thread
// destruction must first check this method before calling Slot::Get(). // destruction must first check this method before calling Slot::Get().
friend class base::SamplingHeapProfiler;
friend class base::internal::ThreadLocalStorageTestInternal; friend class base::internal::ThreadLocalStorageTestInternal;
friend class base::trace_event::MallocDumpProvider; friend class base::trace_event::MallocDumpProvider;
friend class debug::GlobalActivityTracker; friend class debug::GlobalActivityTracker;
friend class heap_profiling::ScopedAllowAlloc; friend class heap_profiling::ScopedAllowAlloc;
friend class heap_profiling::ScopedAllowRealloc;
friend class ui::TLSDestructionCheckerForX11; friend class ui::TLSDestructionCheckerForX11;
static bool HasBeenDestroyed(); static bool HasBeenDestroyed();
......
...@@ -5,17 +5,15 @@ ...@@ -5,17 +5,15 @@
#ifndef COMPONENTS_SERVICES_HEAP_PROFILING_PUBLIC_CPP_SAMPLING_PROFILER_WRAPPER_H_ #ifndef COMPONENTS_SERVICES_HEAP_PROFILING_PUBLIC_CPP_SAMPLING_PROFILER_WRAPPER_H_
#define COMPONENTS_SERVICES_HEAP_PROFILING_PUBLIC_CPP_SAMPLING_PROFILER_WRAPPER_H_ #define COMPONENTS_SERVICES_HEAP_PROFILING_PUBLIC_CPP_SAMPLING_PROFILER_WRAPPER_H_
#include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include "base/sampling_heap_profiler/poisson_allocation_sampler.h" #include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
#include "components/services/heap_profiling/public/cpp/sender_pipe.h"
#include "components/services/heap_profiling/public/cpp/stream.h"
#include "components/services/heap_profiling/public/mojom/heap_profiling_client.mojom.h" #include "components/services/heap_profiling/public/mojom/heap_profiling_client.mojom.h"
namespace heap_profiling { namespace heap_profiling {
class SenderPipe;
// Initializes the TLS slot globally. This will be called early in Chrome's // Initializes the TLS slot globally. This will be called early in Chrome's
// lifecycle to prevent re-entrancy from occurring while trying to set up the // lifecycle to prevent re-entrancy from occurring while trying to set up the
// TLS slot, which is the entity that's supposed to prevent re-entrancy. // TLS slot, which is the entity that's supposed to prevent re-entrancy.
...@@ -34,9 +32,6 @@ bool SetOnInitAllocatorShimCallbackForTesting( ...@@ -34,9 +32,6 @@ bool SetOnInitAllocatorShimCallbackForTesting(
class SamplingProfilerWrapper class SamplingProfilerWrapper
: private base::PoissonAllocationSampler::SamplesObserver { : private base::PoissonAllocationSampler::SamplesObserver {
public: public:
SamplingProfilerWrapper();
~SamplingProfilerWrapper() override;
void StartProfiling(SenderPipe* sender_pipe, void StartProfiling(SenderPipe* sender_pipe,
mojom::ProfilingParamsPtr params); mojom::ProfilingParamsPtr params);
void StopProfiling(); void StopProfiling();
...@@ -51,21 +46,6 @@ class SamplingProfilerWrapper ...@@ -51,21 +46,6 @@ class SamplingProfilerWrapper
mojom::HeapProfilePtr RetrieveHeapProfile(); mojom::HeapProfilePtr RetrieveHeapProfile();
private: private:
struct Sample {
Sample();
Sample(Sample&& sample);
~Sample();
Sample& operator=(Sample&&) = default;
AllocatorType allocator;
size_t size;
const char* context = nullptr;
std::vector<uint64_t> stack;
DISALLOW_COPY_AND_ASSIGN(Sample);
};
// base::PoissonAllocationSampler::SamplesObserver // base::PoissonAllocationSampler::SamplesObserver
void SampleAdded(void* address, void SampleAdded(void* address,
size_t size, size_t size,
...@@ -74,25 +54,7 @@ class SamplingProfilerWrapper ...@@ -74,25 +54,7 @@ class SamplingProfilerWrapper
const char* context) override; const char* context) override;
void SampleRemoved(void* address) override; void SampleRemoved(void* address) override;
void CaptureMixedStack(const char* context, Sample* sample);
void CaptureNativeStack(const char* context, Sample* sample);
const char* RecordString(const char* string);
bool stream_samples_ = false; bool stream_samples_ = false;
// Mutex to access |samples_| and |strings_|.
base::Lock mutex_;
// Samples of the currently live allocations.
std::unordered_map<void*, Sample> samples_;
// When CaptureMode::PSEUDO_STACK or CaptureMode::MIXED_STACK is enabled
// the call stack contents of samples may contain strings besides
// PC addresses.
// In this case each string pointer is also added to the |strings_| set.
// The set does only contain pointers to static strings that are never
// deleted.
std::unordered_set<const char*> strings_;
}; };
} // namespace heap_profiling } // namespace heap_profiling
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment