Commit 32fac919 authored by Stephen Nusko's avatar Stephen Nusko Committed by Commit Bot

Improve InterningIndex performance by switching to std::array.

Instead of MRUCache which uses a sorted map under the hood. Use two
std:arrays, one to store all the keys (very cache friendly when scanning
them). And then immediately jump to the value in the other array.

This leads to a ~9% speed up (5 to 13% at the 95% confidence). See the
"two_arrays tracing/tipOfTreeTracing" sheet in
https://docs.google.com/spreadsheets/d/1HIiyUQa7d8eLfMBBBm_7V60kqNABYoFbHpKdpwnYu78/

Total tracing overhead is ~59.8% (52% to 67% at the 95% confidence). See the
"two_arrays tracing/tipOfTreeNotTracing" sheet in the doc above.

Bug: 1007611
Change-Id: Ie2584e0e3a13e34097021fd128387cfa4fabaeed
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1844822
Commit-Queue: Stephen Nusko <nuskos@chromium.org>
Reviewed-by: default avatarStephen Nusko <nuskos@chromium.org>
Reviewed-by: default avatarEric Seckler <eseckler@chromium.org>
Auto-Submit: Stephen Nusko <nuskos@chromium.org>
Cr-Commit-Position: refs/heads/master@{#703377}
parent 514fc11e
......@@ -5,11 +5,11 @@
#ifndef SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_
#define SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_
#include <array>
#include <cstdint>
#include <tuple>
#include "base/component_export.h"
#include "base/containers/mru_cache.h"
namespace tracing {
......@@ -17,7 +17,7 @@ namespace tracing {
using InterningID = uint32_t;
struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry {
InterningID id;
InterningID id = 0;
// Whether the entry was emitted since the last reset of emitted state. If
// |false|, the sink should (re)emit the entry in the current TracePacket.
......@@ -25,29 +25,80 @@ struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry {
// We don't remove entries on reset of emitted state, so that we can continue
// to use their original IDs and avoid unnecessarily incrementing the ID
// counter.
bool was_emitted;
bool was_emitted = false;
};
// Interning index that associates interned values with interning IDs. It can
// track entries of different types within the same ID space, e.g. so that both
// copied strings and pointers to static strings can co-exist in the same index.
// Uses base::MRUCaches to track the ID associations while enforcing an upper
// bound on the index size.
template <typename... ValueTypes>
//
// The index will cache up to |N| values per ValueType after which it will start
// replacing them in a FIFO basis. N must be a power of 2 for performance
// reasons.
template <size_t N, typename... ValueTypes>
class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
public:
// IndexCache is just a pair of std::arrays which arg kept in sync with each
// other. This has an advantage over a std::array<pairs> since we can load a
// bunch of keys to search in one cache line without loading values.
template <typename ValueType>
using IndexCache = base::MRUCache<ValueType, InterningIndexEntry>;
class IndexCache {
public:
IndexCache() {
// Assert that N is a power of two. This allows the "% N" in Insert() to
// compile to a bunch of bit shifts which improves performance over an
// arbitrary modulo division.
static_assert(
N && ((N & (N - 1)) == 0),
"InterningIndex requires that the cache size be a power of 2.");
}
void Clear() {
for (auto& val : keys_) {
val = ValueType{};
}
}
typename std::array<InterningIndexEntry, N>::iterator Find(
const ValueType& value) {
auto it = std::find(keys_.begin(), keys_.end(), value);
// If the find above returns it == keys_.end(), then (it - keys_.begin())
// will equal keys_.size() which is equal to values_.size(). So
// values_.begin() + values_size() will be values_.end() and we've
// returned the correct value. This saves us checking a conditional in
// this function.
return values_.begin() + (it - keys_.begin());
}
typename std::array<InterningIndexEntry, N>::iterator Insert(
const ValueType& key,
InterningIndexEntry&& value) {
size_t new_position = current_index_++ % N;
keys_[new_position] = key;
values_[new_position] = std::move(value);
return values_.begin() + new_position;
}
typename std::array<InterningIndexEntry, N>::iterator begin() {
return values_.begin();
}
typename std::array<InterningIndexEntry, N>::iterator end() {
return values_.end();
}
private:
size_t current_index_ = 0;
std::array<ValueType, N> keys_{{}};
std::array<InterningIndexEntry, N> values_{{}};
};
// Construct a new index with caches for each of the ValueTypes. The cache
// size for the n-th ValueType will be limited to max_entry_counts[n] entries.
// Construct a new index with caches for each of the ValueTypes. Every cache
// is |N| elements.
//
// For example, to construct an index containing at most 1000 char* pointers
// and 100 std::string objects:
// InterningIndex<char*, std::string> index(1000, 100);
template <typename... SizeType>
InterningIndex(SizeType... max_entry_counts)
: entry_caches_(max_entry_counts...) {}
// For example, to construct an index containing at most 1024 char* pointers
// and 1024 std::string objects:
// InterningIndex<1024, char*, std::string> index;
InterningIndex() = default;
// Returns the entry for the given interned |value|, adding it to the index if
// it didn't exist previously or was evicted from the index. Entries may be
......@@ -60,14 +111,14 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
InterningIndexEntry LookupOrAdd(const ValueType& value) {
IndexCache<ValueType>& cache =
std::get<IndexCache<ValueType>>(entry_caches_);
auto it = cache.Get(value);
auto it = cache.Find(value);
if (it == cache.end()) {
it = cache.Put(value, InterningIndexEntry{next_id_++, false});
it = cache.Insert(value, InterningIndexEntry{next_id_++, false});
}
bool was_emitted = it->second.was_emitted;
bool was_emitted = it->was_emitted;
// The caller will (re)emit the entry, so mark it as emitted.
it->second.was_emitted = true;
return InterningIndexEntry{it->second.id, was_emitted};
it->was_emitted = true;
return InterningIndexEntry{it->id, was_emitted};
}
// Marks all entries as "not emitted", so that they will be reemitted when
......@@ -103,7 +154,7 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
cache.Clear();
} else {
for (auto& entry : cache) {
entry.second.was_emitted = false;
entry.was_emitted = false;
}
}
}
......
......@@ -169,12 +169,6 @@ TrackEventThreadLocalEventSink::TrackEventThreadLocalEventSink(
: ThreadLocalEventSink(std::move(trace_writer),
session_id,
disable_interning),
// TODO(eseckler): Tune these values experimentally.
interned_event_categories_(1000),
interned_event_names_(1000, 100),
interned_annotation_names_(1000, 100),
interned_source_locations_(1000),
interned_log_message_bodies_(100),
process_id_(TraceLog::GetInstance()->process_id()),
thread_id_(static_cast<int>(base::PlatformThread::CurrentId())),
privacy_filtering_enabled_(proto_writer_filtering_enabled) {
......
......@@ -69,12 +69,13 @@ class COMPONENT_EXPORT(TRACING_CPP) TrackEventThreadLocalEventSink
// TODO(eseckler): Make it possible to register new indexes for use from
// TRACE_EVENT macros.
InterningIndex<const char*> interned_event_categories_;
InterningIndex<const char*, std::string> interned_event_names_;
InterningIndex<const char*, std::string> interned_annotation_names_;
InterningIndex<std::tuple<const char*, const char*, int>>
// TODO(eseckler): Tune the cache sizes experimentally.
InterningIndex<1024, const char*> interned_event_categories_;
InterningIndex<1024, const char*, std::string> interned_event_names_;
InterningIndex<1024, const char*, std::string> interned_annotation_names_;
InterningIndex<1024, std::tuple<const char*, const char*, int>>
interned_source_locations_;
InterningIndex<std::string> interned_log_message_bodies_;
InterningIndex<128, std::string> interned_log_message_bodies_;
static std::atomic<uint32_t> incremental_state_reset_id_;
......
......@@ -78,14 +78,15 @@ class COMPONENT_EXPORT(TRACING_CPP) TracingSamplerProfiler {
const base::PlatformThreadId sampled_thread_id_;
base::Lock trace_writer_lock_;
std::unique_ptr<perfetto::TraceWriter> trace_writer_;
InterningIndex<size_t> interned_callstacks_{1000};
InterningIndex<std::pair<std::string, std::string>,
InterningIndex<1024, size_t> interned_callstacks_{};
InterningIndex<1024,
std::pair<std::string, std::string>,
std::pair<uintptr_t, std::string>>
interned_frames_{1000, 1000};
InterningIndex<std::string> interned_frame_names_{1000};
InterningIndex<std::string> interned_module_names_{1000};
InterningIndex<std::string> interned_module_ids_{1000};
InterningIndex<uintptr_t> interned_modules_{1000};
interned_frames_{};
InterningIndex<1024, std::string> interned_frame_names_{};
InterningIndex<1024, std::string> interned_module_names_{};
InterningIndex<1024, std::string> interned_module_ids_{};
InterningIndex<1024, uintptr_t> interned_modules_{};
bool reset_incremental_state_ = true;
uint32_t last_incremental_state_reset_id_ = 0;
int32_t last_emitted_process_priority_ = -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment