Commit 63c34897 authored by Stephen Nusko's avatar Stephen Nusko Committed by Commit Bot

Reintroduce InterningIndex using std::array rather then MRUCache.

We also solve two issues that we saw:

1) Increased memory usage, this was caused because the original try used a single
   value for all ValueTypes which increased empty strings even if not used. We
   address this with allowing every ValueType to have its own value. Some memory
   increase is expected (but won't be the +7% we saw) and is only while tracing
   is active. We expect to see memory regressions of some small percetange as the
   pinpoint perf below shows.

2) Return only valid indexing iids, originally we searched the whole array but that
   included default initialized values and thus we assigned invalid index iids.
   This is address by keeping track of how much of the array has been used.

This changes CPU overhead by ~2% (-5% to +2% at 95% confidence).

Results here: https://docs.google.com/spreadsheets/d/1SIFHwNexnHHn4AELV0ZktHav_Ni7AtohVycKWWwMNEI/

Google internal:
CPU profile with MRUCache
https://pprof.corp.google.com/user-profile?id=df279687f82a1b1ae910d3805c631a5e&showfrom=.*%5BTt%5Drac%5Bie%5D.*

CPU profile with std::array
https://pprof.corp.google.com/user-profile?id=7adaf0baf6c45b9bc9e00aeabb5e4ced&showfrom=.*%5BTt%5Drac%5Bie%5D.*

Bug: 1012257
Change-Id: I3b627cb0b3c2f814ecf6f2106c32b8b439a2f522
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1849679Reviewed-by: default avatarStephen Nusko <nuskos@chromium.org>
Reviewed-by: default avatarEric Seckler <eseckler@chromium.org>
Commit-Queue: Stephen Nusko <nuskos@chromium.org>
Auto-Submit: Stephen Nusko <nuskos@chromium.org>
Cr-Commit-Position: refs/heads/master@{#708989}
parent 7e0868eb
...@@ -5,11 +5,12 @@ ...@@ -5,11 +5,12 @@
#ifndef SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_ #ifndef SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_
#define SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_ #define SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_
#include <algorithm>
#include <array>
#include <cstdint> #include <cstdint>
#include <tuple> #include <tuple>
#include "base/component_export.h" #include "base/component_export.h"
#include "base/containers/mru_cache.h"
namespace tracing { namespace tracing {
...@@ -28,26 +29,155 @@ struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry { ...@@ -28,26 +29,155 @@ struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry {
bool was_emitted; bool was_emitted;
}; };
// Index is a template that finds the index (stored in |value|) of a type
// |ValueType| inside a |Tuple|. This is done at compile time since this is
// needed inside a std::get<>().
template <typename ValueType, typename Tuple>
struct Index;
template <typename T, typename... Types>
struct Index<T, std::tuple<T, Types...>> {
static const size_t value = 0;
};
template <typename T, typename U, typename... Types>
struct Index<T, std::tuple<U, Types...>> {
static const size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
// A TypeList holds a list of types, while SizeList holds a list of size_t
// integers.
//
// We can't just use a parameter pack to store these two informations because we
// can only have one parameter pack in a template (it absorbs all the types) and
// must be last. However we want users to be able to specify per type size
// limits to decrease memory usage. One of these lists aren't strictly needed
// (you could just have users enter the numbers (or types) as a parameter pack
// directly), but we use both so that when declaring a InterningIndex to
// declaration is readable and avoids surprising behaviour.
template <typename...>
struct TypeList {};
template <size_t...>
struct SizeList {};
// Interning index that associates interned values with interning IDs. It can // Interning index that associates interned values with interning IDs. It can
// track entries of different types within the same ID space, e.g. so that both // track entries of different types within the same ID space, e.g. so that both
// copied strings and pointers to static strings can co-exist in the same index. // copied strings and pointers to static strings can co-exist in the same index.
// Uses base::MRUCaches to track the ID associations while enforcing an upper //
// bound on the index size. // The index will cache for a given ValueType and Size pair up to |Size| values
template <typename... ValueTypes> // after which it will start replacing them in a FIFO basis. All |Size|s must be
class COMPONENT_EXPORT(TRACING_CPP) InterningIndex { // a power of 2 for performance reasons (this is a compile time error if not).
//
// This definition just declares the type and template. Do not use this this
// directly, instead use the partial specialization below.
template <typename ValueTypes, typename Sizes>
class COMPONENT_EXPORT(TRACING_CPP) InterningIndex;
// Partially specalized so we can have two parameter packs which get nested
// inside their respective lists.
template <typename... ValueTypes, size_t... Sizes>
class InterningIndex<TypeList<ValueTypes...>, SizeList<Sizes...>> {
public: public:
template <typename ValueType> // IndexCache is just a pair of std::arrays which are kept in sync with each
using IndexCache = base::MRUCache<ValueType, InterningIndexEntry>; // other. This has an advantage over a std::array<pairs> since we can load a
// bunch of keys to search in one cache line without loading values.
template <size_t N, typename ValueType>
class IndexCache {
public:
IndexCache() {
// Assert that N is a power of two. This allows the "% N" in Insert() to
// compile to a bunch of bit shifts which improves performance over an
// arbitrary modulo division.
static_assert(
N && ((N & (N - 1)) == 0),
"InterningIndex requires that the cache size be a power of 2.");
}
void Clear() {
for (auto& val : keys_) {
val = ValueType{};
}
valid_index_ = 0;
current_index_ = 0;
}
typename std::array<InterningIndexEntry, N>::iterator Find(
const ValueType& value) {
// First note that only |valid_index_| worth of entries have been assigned
// InterningIDs.
//
// So we search from the beginning up to |valid_index_|.
//
// If valid_index_ <= keys_.size and the element doesn't exist find will
// return it == keys_.begin() + valid_index_. This works because
// IndexCache::end() returns values_.begin() + valid_index_. So this will
// appear to be the IndexCache::end(), which again makes Find() act like
// std::find should. If valid_index_ == keys_.size() then this will be
// values_.end() as well.
//
// By doing this here and not checking the conditional we can only check
// it once and saves us a pretty impactful branch (this find is the
// majority of the time in LookUpOrInsert when profiled.
auto it = std::find(keys_.begin(), keys_.begin() + valid_index_, value);
return values_.begin() + (it - keys_.begin());
}
typename std::array<InterningIndexEntry, N>::iterator Insert(
const ValueType& key,
InterningIndexEntry&& value) {
size_t new_position = current_index_++ % N;
keys_[new_position] = key;
values_[new_position] = std::move(value);
valid_index_ = std::min(valid_index_ + 1, values_.size());
return values_.begin() + new_position;
}
typename std::array<InterningIndexEntry, N>::iterator begin() {
return values_.begin();
}
typename std::array<InterningIndexEntry, N>::iterator end() {
return values_.begin() + valid_index_;
}
private:
size_t valid_index_ = 0;
size_t current_index_ = 0;
std::array<ValueType, N> keys_{{}};
std::array<InterningIndexEntry, N> values_{{}};
};
// Construct a new index with caches for each of the ValueTypes. The cache // Construct a new index with caches for each of the ValueTypes. The cache
// size for the n-th ValueType will be limited to max_entry_counts[n] entries. // size for the n-th ValueType will be limited to n-th element in the Sizes
// list.
// //
// For example, to construct an index containing at most 1000 char* pointers // For example, to construct an index containing at most 1024 char* pointers
// and 100 std::string objects: // and 128 std::string objects:
// InterningIndex<char*, std::string> index(1000, 100); // InterningIndex<TypeList<char*, std::string>, SizeList<1024, 128>>
template <typename... SizeType> // index;
InterningIndex(SizeType... max_entry_counts) InterningIndex() {}
: entry_caches_(max_entry_counts...) {}
// GetCache<ValueType>() Returns the cache for |ValueType| as a reference.
//
// A c++14 helper function.
// This is purely syntactic sugar function that is made possible by c++14.
// Every call to GetCache could be replaced by inlining the std::get below,
// but this hinders readability due to the template meta programing.
//
// auto return type means the compiler will determine the exact type returned,
// using the trailing decltype(auto) return type (which is a c++14 feature)
// allows us to never specify the type while the decltype(auto) rules
// preserves references.
//
// The return type here is an std::array<ValueType, Size>. However, figuring
// out the correct Size for the given ValueType is hard: because each
// ValueType has a unique Size given at compile time, but when we look up a
// Cache we only know which ValueType we want to store. So instead, we let the
// compiler figure out the return type (including the value of Size) based on
// the call to std;:get<>.
template <typename ValueType>
auto GetCache() -> decltype(auto) {
return std::get<Index<ValueType, std::tuple<ValueTypes...>>::value>(
entry_caches_);
}
// Returns the entry for the given interned |value|, adding it to the index if // Returns the entry for the given interned |value|, adding it to the index if
// it didn't exist previously or was evicted from the index. Entries may be // it didn't exist previously or was evicted from the index. Entries may be
...@@ -58,16 +188,16 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex { ...@@ -58,16 +188,16 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
// (re)emit the entry in the current TracePacket's InternedData message. // (re)emit the entry in the current TracePacket's InternedData message.
template <typename ValueType> template <typename ValueType>
InterningIndexEntry LookupOrAdd(const ValueType& value) { InterningIndexEntry LookupOrAdd(const ValueType& value) {
IndexCache<ValueType>& cache = auto& cache = GetCache<ValueType>();
std::get<IndexCache<ValueType>>(entry_caches_); auto it = cache.Find(value);
auto it = cache.Get(value);
if (it == cache.end()) { if (it == cache.end()) {
it = cache.Put(value, InterningIndexEntry{next_id_++, false}); it = cache.Insert(value, InterningIndexEntry{next_id_++, false});
} }
bool was_emitted = it->second.was_emitted; DCHECK(it->id != 0);
bool was_emitted = it->was_emitted;
// The caller will (re)emit the entry, so mark it as emitted. // The caller will (re)emit the entry, so mark it as emitted.
it->second.was_emitted = true; it->was_emitted = true;
return InterningIndexEntry{it->second.id, was_emitted}; return InterningIndexEntry{it->id, was_emitted};
} }
// Marks all entries as "not emitted", so that they will be reemitted when // Marks all entries as "not emitted", so that they will be reemitted when
...@@ -98,17 +228,17 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex { ...@@ -98,17 +228,17 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
template <typename ValueType> template <typename ValueType>
void ResetStateForValueType(bool clear) { void ResetStateForValueType(bool clear) {
auto& cache = std::get<IndexCache<ValueType>>(entry_caches_); auto& cache = GetCache<ValueType>();
if (clear) { if (clear) {
cache.Clear(); cache.Clear();
} else { } else {
for (auto& entry : cache) { for (auto& entry : cache) {
entry.second.was_emitted = false; entry.was_emitted = false;
} }
} }
} }
std::tuple<IndexCache<ValueTypes>...> entry_caches_; std::tuple<IndexCache<Sizes, ValueTypes>...> entry_caches_;
InterningID next_id_ = 1u; // ID 0 indicates an unset field value. InterningID next_id_ = 1u; // ID 0 indicates an unset field value.
}; };
......
...@@ -169,12 +169,6 @@ TrackEventThreadLocalEventSink::TrackEventThreadLocalEventSink( ...@@ -169,12 +169,6 @@ TrackEventThreadLocalEventSink::TrackEventThreadLocalEventSink(
: ThreadLocalEventSink(std::move(trace_writer), : ThreadLocalEventSink(std::move(trace_writer),
session_id, session_id,
disable_interning), disable_interning),
// TODO(eseckler): Tune these values experimentally.
interned_event_categories_(1000),
interned_event_names_(1000, 100),
interned_annotation_names_(1000, 100),
interned_source_locations_(1000),
interned_log_message_bodies_(100),
process_id_(TraceLog::GetInstance()->process_id()), process_id_(TraceLog::GetInstance()->process_id()),
thread_id_(static_cast<int>(base::PlatformThread::CurrentId())), thread_id_(static_cast<int>(base::PlatformThread::CurrentId())),
privacy_filtering_enabled_(proto_writer_filtering_enabled) { privacy_filtering_enabled_(proto_writer_filtering_enabled) {
......
...@@ -69,12 +69,17 @@ class COMPONENT_EXPORT(TRACING_CPP) TrackEventThreadLocalEventSink ...@@ -69,12 +69,17 @@ class COMPONENT_EXPORT(TRACING_CPP) TrackEventThreadLocalEventSink
// TODO(eseckler): Make it possible to register new indexes for use from // TODO(eseckler): Make it possible to register new indexes for use from
// TRACE_EVENT macros. // TRACE_EVENT macros.
InterningIndex<const char*> interned_event_categories_; InterningIndex<TypeList<const char*>, SizeList<128>>
InterningIndex<const char*, std::string> interned_event_names_; interned_event_categories_;
InterningIndex<const char*, std::string> interned_annotation_names_; InterningIndex<TypeList<const char*, std::string>, SizeList<512, 64>>
InterningIndex<std::tuple<const char*, const char*, int>> interned_event_names_;
InterningIndex<TypeList<const char*, std::string>, SizeList<512, 64>>
interned_annotation_names_;
InterningIndex<TypeList<std::tuple<const char*, const char*, int>>,
SizeList<512>>
interned_source_locations_; interned_source_locations_;
InterningIndex<std::string> interned_log_message_bodies_; InterningIndex<TypeList<std::string>, SizeList<128>>
interned_log_message_bodies_;
static std::atomic<uint32_t> incremental_state_reset_id_; static std::atomic<uint32_t> incremental_state_reset_id_;
......
...@@ -78,14 +78,18 @@ class COMPONENT_EXPORT(TRACING_CPP) TracingSamplerProfiler { ...@@ -78,14 +78,18 @@ class COMPONENT_EXPORT(TRACING_CPP) TracingSamplerProfiler {
const base::PlatformThreadId sampled_thread_id_; const base::PlatformThreadId sampled_thread_id_;
base::Lock trace_writer_lock_; base::Lock trace_writer_lock_;
std::unique_ptr<perfetto::TraceWriter> trace_writer_; std::unique_ptr<perfetto::TraceWriter> trace_writer_;
InterningIndex<size_t> interned_callstacks_{1000}; InterningIndex<TypeList<size_t>, SizeList<1024>> interned_callstacks_{};
InterningIndex<std::pair<std::string, std::string>, InterningIndex<TypeList<std::pair<std::string, std::string>,
std::pair<uintptr_t, std::string>> std::pair<uintptr_t, std::string>>,
interned_frames_{1000, 1000}; SizeList<1024, 1024>>
InterningIndex<std::string> interned_frame_names_{1000}; interned_frames_{};
InterningIndex<std::string> interned_module_names_{1000}; InterningIndex<TypeList<std::string>, SizeList<1024>>
InterningIndex<std::string> interned_module_ids_{1000}; interned_frame_names_{};
InterningIndex<uintptr_t> interned_modules_{1000}; InterningIndex<TypeList<std::string>, SizeList<1024>>
interned_module_names_{};
InterningIndex<TypeList<std::string>, SizeList<1024>>
interned_module_ids_{};
InterningIndex<TypeList<uintptr_t>, SizeList<1024>> interned_modules_{};
bool reset_incremental_state_ = true; bool reset_incremental_state_ = true;
uint32_t last_incremental_state_reset_id_ = 0; uint32_t last_incremental_state_reset_id_ = 0;
int32_t last_emitted_process_priority_ = -1; int32_t last_emitted_process_priority_ = -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment