Commit 3de60857 authored by dskiba's avatar dskiba Committed by Commit Bot

[tracing] Optimize StackFrameDeduplicator.

Profiling StackFrameDeduplicator with new heap format revealed that:

1. At least 60% of all insertions are hits, i.e. they insert backtraces
   that already exist.
2. There can be as many as 500K frames.
3. The average child count at each level is ~1.2.

This CL optimizes StackFrameDeduplicator based on that data:

1. Adds a hash-based backtrace lookup to cut down number of lookups
   in FrameNode::children maps.
2. Changes FrameNode storage from std::vector to std::deque, avoiding
   costly vector reallocations.
3. Changes FrameNode::children storage from std::map to std::flat_map.

These changes improve HeapProfilerPerfTest.DeduplicateStackFrames perftest
~1.6x on macOS (1040ms -> 620ms), but real-world impact should be higher
because the perftest is pessimistic.

BUG=739378

Review-Url: https://codereview.chromium.org/2977783002
Cr-Commit-Position: refs/heads/master@{#487015}
parent eb12aa53
......@@ -7,9 +7,11 @@
#include <inttypes.h>
#include <stddef.h>
#include <algorithm>
#include <string>
#include <utility>
#include "base/hash.h"
#include "base/strings/stringprintf.h"
#include "base/trace_event/heap_profiler_string_deduplicator.h"
#include "base/trace_event/memory_usage_estimator.h"
......@@ -20,6 +22,20 @@
namespace base {
namespace trace_event {
namespace {
// Dumb hash function that nevertheless works surprisingly well and
// produces ~0 collisions on real backtraces.
size_t HashBacktrace(const StackFrame* begin, const StackFrame* end) {
size_t hash = 0;
for (; begin != end; begin++) {
hash += reinterpret_cast<uintptr_t>(begin->value);
}
return hash;
}
} // namespace
StackFrameDeduplicator::FrameNode::FrameNode(StackFrame frame,
int parent_frame_index)
: frame(frame), parent_frame_index(parent_frame_index) {}
......@@ -39,18 +55,55 @@ StackFrameDeduplicator::StackFrameDeduplicator(
}
StackFrameDeduplicator::~StackFrameDeduplicator() {}
int StackFrameDeduplicator::Insert(const StackFrame* beginFrame,
const StackFrame* endFrame) {
if (beginFrame == endFrame) {
bool StackFrameDeduplicator::Match(int frame_index,
const StackFrame* begin_frame,
const StackFrame* end_frame) const {
// |frame_index| identifies the bottom frame, i.e. we need to walk
// backtrace backwards.
const StackFrame* current_frame = end_frame - 1;
for (; current_frame >= begin_frame; --current_frame) {
const FrameNode& node = frames_[frame_index];
if (node.frame != *current_frame) {
break;
}
frame_index = node.parent_frame_index;
if (frame_index == FrameNode::kInvalidFrameIndex) {
if (current_frame == begin_frame) {
// We're at the top node and we matched all backtrace frames,
// i.e. we successfully matched the backtrace.
return true;
}
break;
}
}
return false;
}
int StackFrameDeduplicator::Insert(const StackFrame* begin_frame,
const StackFrame* end_frame) {
if (begin_frame == end_frame) {
// Empty backtraces are mapped to id 0.
return 0;
}
size_t backtrace_hash = HashBacktrace(begin_frame, end_frame);
// Check if we know about this backtrace.
auto backtrace_it = backtrace_lookup_table_.find(backtrace_hash);
if (backtrace_it != backtrace_lookup_table_.end()) {
int backtrace_index = backtrace_it->second;
if (Match(backtrace_index, begin_frame, end_frame)) {
return backtrace_index;
}
}
int frame_index = FrameNode::kInvalidFrameIndex;
std::map<StackFrame, int>* nodes = &roots_;
base::flat_map<StackFrame, int>* nodes = &roots_;
// Loop through the frames, early out when a frame is null.
for (const StackFrame* it = beginFrame; it != endFrame; it++) {
for (const StackFrame* it = begin_frame; it != end_frame; it++) {
StackFrame frame = *it;
auto node = nodes->find(frame);
......@@ -77,6 +130,9 @@ int StackFrameDeduplicator::Insert(const StackFrame* beginFrame,
nodes = &frames_[frame_index].children;
}
// Remember the backtrace.
backtrace_lookup_table_[backtrace_hash] = frame_index;
return frame_index;
}
......@@ -123,8 +179,9 @@ void StackFrameDeduplicator::SerializeIncrementally(TracedValue* traced_value) {
void StackFrameDeduplicator::EstimateTraceMemoryOverhead(
TraceEventMemoryOverhead* overhead) {
size_t memory_usage =
EstimateMemoryUsage(frames_) + EstimateMemoryUsage(roots_);
size_t memory_usage = EstimateMemoryUsage(frames_) +
EstimateMemoryUsage(roots_) +
EstimateMemoryUsage(backtrace_lookup_table_);
overhead->Add(TraceEventMemoryOverhead::kHeapProfilerStackFrameDeduplicator,
sizeof(StackFrameDeduplicator) + memory_usage);
}
......
......@@ -5,11 +5,12 @@
#ifndef BASE_TRACE_EVENT_HEAP_PROFILER_STACK_FRAME_DEDUPLICATOR_H_
#define BASE_TRACE_EVENT_HEAP_PROFILER_STACK_FRAME_DEDUPLICATOR_H_
#include <map>
#include <deque>
#include <string>
#include <vector>
#include <unordered_map>
#include "base/base_export.h"
#include "base/containers/flat_map.h"
#include "base/macros.h"
#include "base/trace_event/heap_profiler_allocation_context.h"
......@@ -45,10 +46,10 @@ class BASE_EXPORT StackFrameDeduplicator {
constexpr static int kInvalidFrameIndex = -1;
// Indices into |frames_| of frames called from the current frame.
std::map<StackFrame, int> children;
base::flat_map<StackFrame, int> children;
};
using ConstIterator = std::vector<FrameNode>::const_iterator;
using ConstIterator = std::deque<FrameNode>::const_iterator;
// |string_deduplication| is used during serialization, and is expected
// to outlive instances of this class.
......@@ -73,12 +74,23 @@ class BASE_EXPORT StackFrameDeduplicator {
void EstimateTraceMemoryOverhead(TraceEventMemoryOverhead* overhead);
private:
// Checks that existing backtrace identified by |frame_index| equals
// to the one identified by |begin_frame|, |end_frame|.
bool Match(int frame_index,
const StackFrame* begin_frame,
const StackFrame* end_frame) const;
StringDeduplicator* string_deduplicator_;
std::map<StackFrame, int> roots_;
std::vector<FrameNode> frames_;
base::flat_map<StackFrame, int> roots_;
std::deque<FrameNode> frames_;
size_t last_exported_index_;
// {backtrace_hash -> frame_index} map for finding backtraces that are
// already added. Backtraces themselves are not stored in the map, instead
// Match() is used on the found frame_index to detect collisions.
std::unordered_map<size_t, int> backtrace_lookup_table_;
DISALLOW_COPY_AND_ASSIGN(StackFrameDeduplicator);
};
......
......@@ -106,7 +106,7 @@ TEST(StackFrameDeduplicatorTest, ImplicitId0) {
StackFrameDeduplicator dedup(&string_dedup);
// Node #0 is added implicitly and corresponds to an empty backtrace.
ASSERT_EQ(dedup.begin() + 1, dedup.end());
ASSERT_TRUE(dedup.begin() + 1 == dedup.end());
ASSERT_EQ(0, dedup.Insert(std::begin(null_bt), std::begin(null_bt)));
// Placeholder entry for ID 0 is a frame with NULL name and no parent.
......@@ -138,7 +138,7 @@ TEST(StackFrameDeduplicatorTest, SingleBacktrace) {
ASSERT_EQ(kMalloc, (iter + 2)->frame);
ASSERT_EQ(2, (iter + 2)->parent_frame_index);
ASSERT_EQ(iter + 3, dedup.end());
ASSERT_TRUE(iter + 3 == dedup.end());
}
TEST(StackFrameDeduplicatorTest, SingleBacktraceWithNull) {
......@@ -168,7 +168,7 @@ TEST(StackFrameDeduplicatorTest, SingleBacktraceWithNull) {
ASSERT_EQ(kMalloc, (iter + 2)->frame);
ASSERT_EQ(2, (iter + 2)->parent_frame_index);
ASSERT_EQ(iter + 3, dedup.end());
ASSERT_TRUE(iter + 3 == dedup.end());
}
// Test that there can be different call trees (there can be multiple bottom
......@@ -206,7 +206,7 @@ TEST(StackFrameDeduplicatorTest, MultipleRoots) {
ASSERT_EQ(kCreateWidget, (iter + 3)->frame);
ASSERT_EQ(3, (iter + 3)->parent_frame_index);
ASSERT_EQ(iter + 4, dedup.end());
ASSERT_TRUE(iter + 4 == dedup.end());
}
TEST(StackFrameDeduplicatorTest, Deduplication) {
......@@ -236,7 +236,7 @@ TEST(StackFrameDeduplicatorTest, Deduplication) {
ASSERT_EQ(kInitialize, (iter + 2)->frame);
ASSERT_EQ(1, (iter + 2)->parent_frame_index);
ASSERT_EQ(iter + 3, dedup.end());
ASSERT_TRUE(iter + 3 == dedup.end());
// Inserting the same backtrace again should return the index of the existing
// node.
......
......@@ -22,6 +22,8 @@
#include <vector>
#include "base/base_export.h"
#include "base/containers/flat_map.h"
#include "base/containers/flat_set.h"
#include "base/containers/linked_list.h"
#include "base/strings/string16.h"
......@@ -150,6 +152,12 @@ size_t EstimateMemoryUsage(const std::priority_queue<T, C>& queue);
template <class T, class C>
size_t EstimateMemoryUsage(const std::stack<T, C>& stack);
template <class T, class C>
size_t EstimateMemoryUsage(const base::flat_set<T, C>& set);
template <class K, class V, class C>
size_t EstimateMemoryUsage(const base::flat_map<K, V, C>& map);
// TODO(dskiba):
// std::forward_list
......@@ -542,6 +550,20 @@ size_t EstimateMemoryUsage(const std::stack<T, C>& stack) {
return EstimateMemoryUsage(internal::GetUnderlyingContainer(stack));
}
// Flat containers
template <class T, class C>
size_t EstimateMemoryUsage(const base::flat_set<T, C>& set) {
using value_type = typename base::flat_set<T, C>::value_type;
return sizeof(value_type) * set.capacity() + EstimateIterableMemoryUsage(set);
}
template <class K, class V, class C>
size_t EstimateMemoryUsage(const base::flat_map<K, V, C>& map) {
using value_type = typename base::flat_map<K, V, C>::value_type;
return sizeof(value_type) * map.capacity() + EstimateIterableMemoryUsage(map);
}
} // namespace trace_event
} // namespace base
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment