Commit 60a4e46b authored by Eric Seckler's avatar Eric Seckler Committed by Commit Bot

Revert "Improve InterningIndex performance by switching to std::array."

This reverts commit 32fac919.

Reason for revert: crbug.com/1012218

Original change's description:
> Improve InterningIndex performance by switching to std::array.
>
> Instead of MRUCache which uses a sorted map under the hood. Use two
> std:arrays, one to store all the keys (very cache friendly when scanning
> them). And then immediately jump to the value in the other array.
>
> This leads to a ~9% speed up (5 to 13% at the 95% confidence). See the
> "two_arrays tracing/tipOfTreeTracing" sheet in
> https://docs.google.com/spreadsheets/d/1HIiyUQa7d8eLfMBBBm_7V60kqNABYoFbHpKdpwnYu78/
>
> Total tracing overhead is ~59.8% (52% to 67% at the 95% confidence). See the
> "two_arrays tracing/tipOfTreeNotTracing" sheet in the doc above.
>
> Bug: 1007611
> Change-Id: Ie2584e0e3a13e34097021fd128387cfa4fabaeed
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1844822
> Commit-Queue: Stephen Nusko <nuskos@chromium.org>
> Reviewed-by: Stephen Nusko <nuskos@chromium.org>
> Reviewed-by: Eric Seckler <eseckler@chromium.org>
> Auto-Submit: Stephen Nusko <nuskos@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#703377}

TBR=eseckler@chromium.org,nuskos@chromium.org

# Not skipping CQ checks because original CL landed > 1 day ago.

Bug: 1007611, 1012218
Change-Id: Ie5d8298a2a62e4856d1cd16fec7dae6bf1e73bf5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1852084Reviewed-by: default avatarEric Seckler <eseckler@chromium.org>
Commit-Queue: Eric Seckler <eseckler@chromium.org>
Cr-Commit-Position: refs/heads/master@{#704522}
parent 64b82d94
...@@ -5,11 +5,11 @@ ...@@ -5,11 +5,11 @@
#ifndef SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_ #ifndef SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_
#define SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_ #define SERVICES_TRACING_PUBLIC_CPP_PERFETTO_INTERNING_INDEX_H_
#include <array>
#include <cstdint> #include <cstdint>
#include <tuple> #include <tuple>
#include "base/component_export.h" #include "base/component_export.h"
#include "base/containers/mru_cache.h"
namespace tracing { namespace tracing {
...@@ -17,7 +17,7 @@ namespace tracing { ...@@ -17,7 +17,7 @@ namespace tracing {
using InterningID = uint32_t; using InterningID = uint32_t;
struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry { struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry {
InterningID id = 0; InterningID id;
// Whether the entry was emitted since the last reset of emitted state. If // Whether the entry was emitted since the last reset of emitted state. If
// |false|, the sink should (re)emit the entry in the current TracePacket. // |false|, the sink should (re)emit the entry in the current TracePacket.
...@@ -25,80 +25,29 @@ struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry { ...@@ -25,80 +25,29 @@ struct COMPONENT_EXPORT(TRACING_CPP) InterningIndexEntry {
// We don't remove entries on reset of emitted state, so that we can continue // We don't remove entries on reset of emitted state, so that we can continue
// to use their original IDs and avoid unnecessarily incrementing the ID // to use their original IDs and avoid unnecessarily incrementing the ID
// counter. // counter.
bool was_emitted = false; bool was_emitted;
}; };
// Interning index that associates interned values with interning IDs. It can // Interning index that associates interned values with interning IDs. It can
// track entries of different types within the same ID space, e.g. so that both // track entries of different types within the same ID space, e.g. so that both
// copied strings and pointers to static strings can co-exist in the same index. // copied strings and pointers to static strings can co-exist in the same index.
// // Uses base::MRUCaches to track the ID associations while enforcing an upper
// The index will cache up to |N| values per ValueType after which it will start // bound on the index size.
// replacing them in a FIFO basis. N must be a power of 2 for performance template <typename... ValueTypes>
// reasons.
template <size_t N, typename... ValueTypes>
class COMPONENT_EXPORT(TRACING_CPP) InterningIndex { class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
public: public:
// IndexCache is just a pair of std::arrays which arg kept in sync with each
// other. This has an advantage over a std::array<pairs> since we can load a
// bunch of keys to search in one cache line without loading values.
template <typename ValueType> template <typename ValueType>
class IndexCache { using IndexCache = base::MRUCache<ValueType, InterningIndexEntry>;
public:
IndexCache() {
// Assert that N is a power of two. This allows the "% N" in Insert() to
// compile to a bunch of bit shifts which improves performance over an
// arbitrary modulo division.
static_assert(
N && ((N & (N - 1)) == 0),
"InterningIndex requires that the cache size be a power of 2.");
}
void Clear() {
for (auto& val : keys_) {
val = ValueType{};
}
}
typename std::array<InterningIndexEntry, N>::iterator Find(
const ValueType& value) {
auto it = std::find(keys_.begin(), keys_.end(), value);
// If the find above returns it == keys_.end(), then (it - keys_.begin())
// will equal keys_.size() which is equal to values_.size(). So
// values_.begin() + values_size() will be values_.end() and we've
// returned the correct value. This saves us checking a conditional in
// this function.
return values_.begin() + (it - keys_.begin());
}
typename std::array<InterningIndexEntry, N>::iterator Insert(
const ValueType& key,
InterningIndexEntry&& value) {
size_t new_position = current_index_++ % N;
keys_[new_position] = key;
values_[new_position] = std::move(value);
return values_.begin() + new_position;
}
typename std::array<InterningIndexEntry, N>::iterator begin() {
return values_.begin();
}
typename std::array<InterningIndexEntry, N>::iterator end() {
return values_.end();
}
private:
size_t current_index_ = 0;
std::array<ValueType, N> keys_{{}};
std::array<InterningIndexEntry, N> values_{{}};
};
// Construct a new index with caches for each of the ValueTypes. Every cache // Construct a new index with caches for each of the ValueTypes. The cache
// is |N| elements. // size for the n-th ValueType will be limited to max_entry_counts[n] entries.
// //
// For example, to construct an index containing at most 1024 char* pointers // For example, to construct an index containing at most 1000 char* pointers
// and 1024 std::string objects: // and 100 std::string objects:
// InterningIndex<1024, char*, std::string> index; // InterningIndex<char*, std::string> index(1000, 100);
InterningIndex() = default; template <typename... SizeType>
InterningIndex(SizeType... max_entry_counts)
: entry_caches_(max_entry_counts...) {}
// Returns the entry for the given interned |value|, adding it to the index if // Returns the entry for the given interned |value|, adding it to the index if
// it didn't exist previously or was evicted from the index. Entries may be // it didn't exist previously or was evicted from the index. Entries may be
...@@ -111,14 +60,14 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex { ...@@ -111,14 +60,14 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
InterningIndexEntry LookupOrAdd(const ValueType& value) { InterningIndexEntry LookupOrAdd(const ValueType& value) {
IndexCache<ValueType>& cache = IndexCache<ValueType>& cache =
std::get<IndexCache<ValueType>>(entry_caches_); std::get<IndexCache<ValueType>>(entry_caches_);
auto it = cache.Find(value); auto it = cache.Get(value);
if (it == cache.end()) { if (it == cache.end()) {
it = cache.Insert(value, InterningIndexEntry{next_id_++, false}); it = cache.Put(value, InterningIndexEntry{next_id_++, false});
} }
bool was_emitted = it->was_emitted; bool was_emitted = it->second.was_emitted;
// The caller will (re)emit the entry, so mark it as emitted. // The caller will (re)emit the entry, so mark it as emitted.
it->was_emitted = true; it->second.was_emitted = true;
return InterningIndexEntry{it->id, was_emitted}; return InterningIndexEntry{it->second.id, was_emitted};
} }
// Marks all entries as "not emitted", so that they will be reemitted when // Marks all entries as "not emitted", so that they will be reemitted when
...@@ -154,7 +103,7 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex { ...@@ -154,7 +103,7 @@ class COMPONENT_EXPORT(TRACING_CPP) InterningIndex {
cache.Clear(); cache.Clear();
} else { } else {
for (auto& entry : cache) { for (auto& entry : cache) {
entry.was_emitted = false; entry.second.was_emitted = false;
} }
} }
} }
......
...@@ -169,6 +169,12 @@ TrackEventThreadLocalEventSink::TrackEventThreadLocalEventSink( ...@@ -169,6 +169,12 @@ TrackEventThreadLocalEventSink::TrackEventThreadLocalEventSink(
: ThreadLocalEventSink(std::move(trace_writer), : ThreadLocalEventSink(std::move(trace_writer),
session_id, session_id,
disable_interning), disable_interning),
// TODO(eseckler): Tune these values experimentally.
interned_event_categories_(1000),
interned_event_names_(1000, 100),
interned_annotation_names_(1000, 100),
interned_source_locations_(1000),
interned_log_message_bodies_(100),
process_id_(TraceLog::GetInstance()->process_id()), process_id_(TraceLog::GetInstance()->process_id()),
thread_id_(static_cast<int>(base::PlatformThread::CurrentId())), thread_id_(static_cast<int>(base::PlatformThread::CurrentId())),
privacy_filtering_enabled_(proto_writer_filtering_enabled) { privacy_filtering_enabled_(proto_writer_filtering_enabled) {
......
...@@ -69,13 +69,12 @@ class COMPONENT_EXPORT(TRACING_CPP) TrackEventThreadLocalEventSink ...@@ -69,13 +69,12 @@ class COMPONENT_EXPORT(TRACING_CPP) TrackEventThreadLocalEventSink
// TODO(eseckler): Make it possible to register new indexes for use from // TODO(eseckler): Make it possible to register new indexes for use from
// TRACE_EVENT macros. // TRACE_EVENT macros.
// TODO(eseckler): Tune the cache sizes experimentally. InterningIndex<const char*> interned_event_categories_;
InterningIndex<1024, const char*> interned_event_categories_; InterningIndex<const char*, std::string> interned_event_names_;
InterningIndex<1024, const char*, std::string> interned_event_names_; InterningIndex<const char*, std::string> interned_annotation_names_;
InterningIndex<1024, const char*, std::string> interned_annotation_names_; InterningIndex<std::tuple<const char*, const char*, int>>
InterningIndex<1024, std::tuple<const char*, const char*, int>>
interned_source_locations_; interned_source_locations_;
InterningIndex<128, std::string> interned_log_message_bodies_; InterningIndex<std::string> interned_log_message_bodies_;
static std::atomic<uint32_t> incremental_state_reset_id_; static std::atomic<uint32_t> incremental_state_reset_id_;
......
...@@ -78,15 +78,14 @@ class COMPONENT_EXPORT(TRACING_CPP) TracingSamplerProfiler { ...@@ -78,15 +78,14 @@ class COMPONENT_EXPORT(TRACING_CPP) TracingSamplerProfiler {
const base::PlatformThreadId sampled_thread_id_; const base::PlatformThreadId sampled_thread_id_;
base::Lock trace_writer_lock_; base::Lock trace_writer_lock_;
std::unique_ptr<perfetto::TraceWriter> trace_writer_; std::unique_ptr<perfetto::TraceWriter> trace_writer_;
InterningIndex<1024, size_t> interned_callstacks_{}; InterningIndex<size_t> interned_callstacks_{1000};
InterningIndex<1024, InterningIndex<std::pair<std::string, std::string>,
std::pair<std::string, std::string>,
std::pair<uintptr_t, std::string>> std::pair<uintptr_t, std::string>>
interned_frames_{}; interned_frames_{1000, 1000};
InterningIndex<1024, std::string> interned_frame_names_{}; InterningIndex<std::string> interned_frame_names_{1000};
InterningIndex<1024, std::string> interned_module_names_{}; InterningIndex<std::string> interned_module_names_{1000};
InterningIndex<1024, std::string> interned_module_ids_{}; InterningIndex<std::string> interned_module_ids_{1000};
InterningIndex<1024, uintptr_t> interned_modules_{}; InterningIndex<uintptr_t> interned_modules_{1000};
bool reset_incremental_state_ = true; bool reset_incremental_state_ = true;
uint32_t last_incremental_state_reset_id_ = 0; uint32_t last_incremental_state_reset_id_ = 0;
int32_t last_emitted_process_priority_ = -1; int32_t last_emitted_process_priority_ = -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment