Commit 0bed5b24 authored by Alexei Filippov's avatar Alexei Filippov Committed by Commit Bot

[sampling heap profiler] Implement lock-free hash set.

It turned out the std::unordered_set does not support concurrent access
even to distinct items in the container.

Here's the custom implementation of a hash set with keys of void* type.
It supports lock-free concurrent access to Insert, Remove, and Contains
operations. The latter made as fast as possible since it is on the hot path
of the memory allocation hooks.

BUG=854399

Change-Id: Ia53eeff08bafc363df2aef8ac5cdd5212f124452
Reviewed-on: https://chromium-review.googlesource.com/1121101Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarAndrey Kosyakov <caseq@chromium.org>
Commit-Queue: Alexei Filippov <alph@chromium.org>
Cr-Commit-Position: refs/heads/master@{#572446}
parent ac9d7806
...@@ -714,6 +714,8 @@ jumbo_component("base") { ...@@ -714,6 +714,8 @@ jumbo_component("base") {
"rand_util_win.cc", "rand_util_win.cc",
"run_loop.cc", "run_loop.cc",
"run_loop.h", "run_loop.h",
"sampling_heap_profiler/lock_free_address_hash_set.cc",
"sampling_heap_profiler/lock_free_address_hash_set.h",
"sampling_heap_profiler/sampling_heap_profiler.cc", "sampling_heap_profiler/sampling_heap_profiler.cc",
"sampling_heap_profiler/sampling_heap_profiler.h", "sampling_heap_profiler/sampling_heap_profiler.h",
"scoped_clear_errno.h", "scoped_clear_errno.h",
...@@ -2317,6 +2319,7 @@ test("base_unittests") { ...@@ -2317,6 +2319,7 @@ test("base_unittests") {
"rand_util_unittest.cc", "rand_util_unittest.cc",
"run_loop_unittest.cc", "run_loop_unittest.cc",
"safe_numerics_unittest.cc", "safe_numerics_unittest.cc",
"sampling_heap_profiler/lock_free_address_hash_set_unittest.cc",
"scoped_clear_errno_unittest.cc", "scoped_clear_errno_unittest.cc",
"scoped_generic_unittest.cc", "scoped_generic_unittest.cc",
"scoped_native_library_unittest.cc", "scoped_native_library_unittest.cc",
......
...@@ -4,63 +4,55 @@ ...@@ -4,63 +4,55 @@
// To benchmark a specific version of Chrome set the CHROME_PATH environment // To benchmark a specific version of Chrome set the CHROME_PATH environment
// variable, e.g.: // variable, e.g.:
// $ CHROME_PATH=~/chromium/out/Release/chrome node benchmark-octane.js // $ CHROME_PATH=~/chromium/src/out/Release/chrome node benchmark-octane.js
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer');
let base_score;
async function runOctane(samplingRate) { async function runOctane(samplingRate) {
const args = ['--enable-devtools-experiments']; const args = ['--enable-devtools-experiments'];
if (samplingRate) if (samplingRate)
args.push(`--sampling-heap-profiler=${samplingRate}`); args.push(`--sampling-heap-profiler=${samplingRate}`);
const browser = await puppeteer.launch({ while (true) {
executablePath: process.env.CHROME_PATH, args, headless: true}); let brower;
try { try {
const page = await browser.newPage(); browser = await puppeteer.launch({
await page.goto('https://chromium.github.io/octane/'); executablePath: process.env.CHROME_PATH, args, headless: true});
await page.waitForSelector('#run-octane'); // Just in case. const page = await browser.newPage();
await page.click('#run-octane'); await page.goto('https://chromium.github.io/octane/');
await page.waitForSelector('#run-octane'); // Just in case.
const scoreDiv = await page.waitForSelector('#main-banner:only-child', await page.click('#run-octane');
{timeout: 120000});
const scoreText = await page.evaluate(e => e.innerText, scoreDiv); const scoreDiv = await page.waitForSelector('#main-banner:only-child',
const match = /Score:\s*(\d+)/.exec(scoreText); {timeout: 120000});
if (match.length < 2) { const scoreText = await page.evaluate(e => e.innerText, scoreDiv);
console.log(`Error: cannot parse score from '${scoreText}'`); const match = /Score:\s*(\d+)/.exec(scoreText);
return 0; if (match.length < 2)
continue;
return parseInt(match[1]);
} finally {
if (browser)
await browser.close();
} }
return parseInt(match[1]);
} finally {
await browser.close();
} }
} }
async function makeRuns(rate) { async function makeRuns(rates) {
console.log(`tesing rate: ${rate}`); const scores = [];
let sum = 0; for (const rate of rates)
let sum2 = 0; scores.push(await runOctane(rate));
const n = 10; console.log(scores.join('\t'));
for (let i = 0; i < n; ++i) {
const score = await runOctane(rate);
console.log(score);
sum += score;
sum2 += score * score;
}
const mean = sum / n;
const stdev = Math.sqrt(sum2 / n - mean * mean);
console.log(`rate: ${rate} mean: ${mean} stdev: ${stdev}`);
return mean;
} }
async function main() { async function main() {
console.log(`Using ${process.env.CHROME_PATH || puppeteer.executablePath()}`); console.log(`Using ${process.env.CHROME_PATH || puppeteer.executablePath()}`);
const base_score = await makeRuns(0); const rates = [0];
for (let rate = 32; rate <= 2048; rate *= 2) { for (let rate = 8; rate <= 2048; rate *= 2)
const score = await makeRuns(rate); rates.push(rate);
console.log(`slowdown: ${(100 - score / base_score * 100).toFixed(2)}%\n`); console.log('Rates [KB]:');
} console.log(rates.join('\t'));
console.log('='.repeat(rates.length * 8));
for (let i = 0; i < 100; ++i)
await makeRuns(rates);
} }
main(); main();
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include <limits>
#include "base/bits.h"
namespace base {
LockFreeAddressHashSet::LockFreeAddressHashSet(size_t buckets_count)
: buckets_(buckets_count), bucket_mask_(buckets_count - 1) {
DCHECK(bits::IsPowerOfTwo(buckets_count));
DCHECK(bucket_mask_ <= std::numeric_limits<uint32_t>::max());
}
LockFreeAddressHashSet::~LockFreeAddressHashSet() {
for (subtle::AtomicWord bucket : buckets_) {
Node* node = reinterpret_cast<Node*>(bucket);
while (node) {
Node* next = reinterpret_cast<Node*>(node->next);
delete node;
node = next;
}
}
}
void LockFreeAddressHashSet::Insert(void* key) {
// TODO(alph): Replace with DCHECK.
CHECK(key != nullptr);
CHECK(!Contains(key));
subtle::NoBarrier_AtomicIncrement(&size_, 1);
uint32_t h = Hash(key);
subtle::AtomicWord* bucket_ptr = &buckets_[h & bucket_mask_];
Node* node = reinterpret_cast<Node*>(subtle::NoBarrier_Load(bucket_ptr));
// First iterate over the bucket nodes and try to reuse an empty one if found.
for (; node != nullptr; node = next_node(node)) {
if (subtle::NoBarrier_CompareAndSwap(
&node->key, 0, reinterpret_cast<subtle::AtomicWord>(key)) == 0) {
return;
}
}
DCHECK(node == nullptr);
// There are no empty nodes to reuse in the bucket.
// Create a new node and prepend it to the list.
Node* new_node = new Node(key);
subtle::AtomicWord current_head = subtle::NoBarrier_Load(bucket_ptr);
subtle::AtomicWord expected_head;
do {
subtle::NoBarrier_Store(&new_node->next, current_head);
expected_head = current_head;
current_head = subtle::Release_CompareAndSwap(
bucket_ptr, current_head,
reinterpret_cast<subtle::AtomicWord>(new_node));
} while (current_head != expected_head);
}
void LockFreeAddressHashSet::Copy(const LockFreeAddressHashSet& other) {
DCHECK_EQ(0u, size());
for (subtle::AtomicWord bucket : other.buckets_) {
for (Node* node = reinterpret_cast<Node*>(bucket); node;
node = next_node(node)) {
subtle::AtomicWord k = subtle::NoBarrier_Load(&node->key);
if (k)
Insert(reinterpret_cast<void*>(k));
}
}
}
} // namespace base
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
#define BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
#include <cstdint>
#include <vector>
#include "base/atomicops.h"
#include "base/logging.h"
namespace base {
// A hash set container that provides lock-free versions of
// |Insert|, |Remove|, and |Contains| operations.
// It does not support concurrent write operations |Insert| and |Remove|
// over the same key. Concurrent writes of distinct keys are ok.
// |Contains| method can be executed concurrently with other |Insert|, |Remove|,
// or |Contains| even over the same key.
// However, please note the result of concurrent execution of |Contains|
// with |Insert| or |Remove| is racy.
//
// The hash set never rehashes, so the number of buckets stays the same
// for the lifetime of the set.
//
// Internally the hashset is implemented as a vector of N buckets
// (N has to be a power of 2). Each bucket holds a single-linked list of
// nodes each corresponding to a key.
// It is not possible to really delete nodes from the list as there might
// be concurrent reads being executed over the node. The |Remove| operation
// just marks the node as empty by placing nullptr into its key field.
// Consequent |Insert| operations may reuse empty nodes when possible.
//
// The structure of the hashset for N buckets is the following:
// 0: {*}--> {key1,*}--> {key2,*}--> NULL
// 1: {*}--> NULL
// 2: {*}--> {NULL,*}--> {key3,*}--> {key4,*}--> NULL
// ...
// N-1: {*}--> {keyM,*}--> NULL
class BASE_EXPORT LockFreeAddressHashSet {
public:
explicit LockFreeAddressHashSet(size_t buckets_count);
~LockFreeAddressHashSet();
// Checks if the |key| is in the set. Can be executed concurrently with
// |Insert|, |Remove|, and |Contains| operations.
bool Contains(void* key) const;
// Removes the |key| from the set. The key must be present in the set before
// the invocation.
// Can be concurrent with other |Insert| and |Remove| executions, provided
// they operate over distinct keys.
// Concurrent |Insert| or |Remove| executions over the same key are not
// supported.
void Remove(void* key);
// Inserts the |key| into the set. The key must not be present in the set
// before the invocation.
// Can be concurrent with other |Insert| and |Remove| executions, provided
// they operate over distinct keys.
// Concurrent |Insert| or |Remove| executions over the same key are not
// supported.
void Insert(void* key);
// Copies contents of |other| set into the current set. The current set
// must be empty before the call.
// The operation cannot be executed concurrently with any other methods.
void Copy(const LockFreeAddressHashSet& other);
size_t buckets_count() const { return buckets_.size(); }
size_t size() const {
return static_cast<size_t>(subtle::NoBarrier_Load(&size_));
}
// Returns the average bucket utilization.
float load_factor() const { return 1.f * size() / buckets_.size(); }
private:
friend class LockFreeAddressHashSetTest;
struct Node {
Node() : key(0), next(0) {}
explicit Node(void* key);
subtle::AtomicWord key;
subtle::AtomicWord next;
};
static uint32_t Hash(void* key);
Node* FindNode(void* key) const;
Node* Bucket(void* key) const;
static Node* next_node(Node* node) {
return reinterpret_cast<Node*>(subtle::NoBarrier_Load(&node->next));
}
std::vector<subtle::AtomicWord> buckets_;
size_t bucket_mask_;
subtle::AtomicWord size_ = 0;
};
inline LockFreeAddressHashSet::Node::Node(void* a_key) {
subtle::NoBarrier_Store(&key, reinterpret_cast<subtle::AtomicWord>(a_key));
subtle::NoBarrier_Store(&next, 0);
}
inline bool LockFreeAddressHashSet::Contains(void* key) const {
return FindNode(key) != nullptr;
}
inline void LockFreeAddressHashSet::Remove(void* key) {
Node* node = FindNode(key);
// TODO(alph): Replace with DCHECK.
CHECK(node != nullptr);
// We can never delete the node, nor detach it from the current bucket
// as there may always be another thread currently iterating over it.
// Instead we just mark it as empty, so |Insert| can reuse it later.
subtle::NoBarrier_Store(&node->key, 0);
subtle::NoBarrier_AtomicIncrement(&size_, -1);
}
inline LockFreeAddressHashSet::Node* LockFreeAddressHashSet::FindNode(
void* key) const {
for (Node* node = Bucket(key); node != nullptr; node = next_node(node)) {
void* k = reinterpret_cast<void*>(subtle::NoBarrier_Load(&node->key));
if (k == key)
return node;
}
return nullptr;
}
inline LockFreeAddressHashSet::Node* LockFreeAddressHashSet::Bucket(
void* key) const {
// TODO(alph): Replace with DCHECK.
CHECK(key != nullptr);
uint32_t h = Hash(key);
return reinterpret_cast<Node*>(
subtle::NoBarrier_Load(&buckets_[h & bucket_mask_]));
}
// static
inline uint32_t LockFreeAddressHashSet::Hash(void* key) {
// A simple fast hash function for addresses.
uint64_t k = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(key));
uint64_t random_bits = 0x4bfdb9df5a6f243bull;
return static_cast<uint32_t>((k * random_bits) >> 32);
}
} // namespace base
#endif // BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include <stdlib.h>
#include <cinttypes>
#include "base/allocator/allocator_shim.h"
#include "base/debug/alias.h"
#include "base/threading/simple_thread.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace base {
class LockFreeAddressHashSetTest : public ::testing::Test {
public:
static bool Subset(const LockFreeAddressHashSet& superset,
const LockFreeAddressHashSet& subset) {
for (subtle::AtomicWord bucket : subset.buckets_) {
for (LockFreeAddressHashSet::Node* node =
reinterpret_cast<LockFreeAddressHashSet::Node*>(bucket);
node; node = LockFreeAddressHashSet::next_node(node)) {
void* key = reinterpret_cast<void*>(node->key);
if (key && !superset.Contains(key))
return false;
}
}
return true;
}
static bool Equals(const LockFreeAddressHashSet& set1,
const LockFreeAddressHashSet& set2) {
return Subset(set1, set2) && Subset(set2, set1);
}
static size_t BucketSize(const LockFreeAddressHashSet& set, size_t bucket) {
size_t count = 0;
LockFreeAddressHashSet::Node* node =
reinterpret_cast<LockFreeAddressHashSet::Node*>(set.buckets_[bucket]);
for (; node; node = set.next_node(node))
++count;
return count;
}
};
namespace {
TEST_F(LockFreeAddressHashSetTest, EmptySet) {
LockFreeAddressHashSet set(8);
EXPECT_EQ(size_t(0), set.size());
EXPECT_EQ(size_t(8), set.buckets_count());
EXPECT_EQ(0., set.load_factor());
EXPECT_FALSE(set.Contains(&set));
}
TEST_F(LockFreeAddressHashSetTest, BasicOperations) {
LockFreeAddressHashSet set(8);
for (size_t i = 1; i <= 100; ++i) {
void* ptr = reinterpret_cast<void*>(i);
set.Insert(ptr);
EXPECT_EQ(i, set.size());
EXPECT_TRUE(set.Contains(ptr));
}
size_t size = 100;
EXPECT_EQ(size, set.size());
EXPECT_EQ(size_t(8), set.buckets_count());
EXPECT_EQ(size / 8., set.load_factor());
for (size_t i = 99; i >= 3; i -= 3) {
void* ptr = reinterpret_cast<void*>(i);
set.Remove(ptr);
EXPECT_EQ(--size, set.size());
EXPECT_FALSE(set.Contains(ptr));
}
// Removed every 3rd value (33 total) from the set, 67 have left.
EXPECT_EQ(size_t(67), set.size());
for (size_t i = 1; i <= 100; ++i) {
void* ptr = reinterpret_cast<void*>(i);
EXPECT_EQ(i % 3 != 0, set.Contains(ptr));
}
}
TEST_F(LockFreeAddressHashSetTest, Copy) {
LockFreeAddressHashSet set(16);
for (size_t i = 1000; i <= 16000; i += 1000) {
void* ptr = reinterpret_cast<void*>(i);
set.Insert(ptr);
}
LockFreeAddressHashSet set2(4);
LockFreeAddressHashSet set3(64);
set2.Copy(set);
set3.Copy(set);
EXPECT_TRUE(Equals(set, set2));
EXPECT_TRUE(Equals(set, set3));
EXPECT_TRUE(Equals(set2, set3));
set.Insert(reinterpret_cast<void*>(42));
EXPECT_FALSE(Equals(set, set2));
EXPECT_FALSE(Equals(set, set3));
EXPECT_TRUE(Equals(set2, set3));
EXPECT_TRUE(Subset(set, set2));
EXPECT_FALSE(Subset(set2, set));
}
class WriterThread : public SimpleThread {
public:
WriterThread(LockFreeAddressHashSet* set, subtle::Atomic32* cancel)
: SimpleThread("ReaderThread"), set_(set), cancel_(cancel) {}
void Run() override {
for (size_t value = 42; !subtle::Acquire_Load(cancel_); ++value) {
void* ptr = reinterpret_cast<void*>(value);
set_->Insert(ptr);
EXPECT_TRUE(set_->Contains(ptr));
set_->Remove(ptr);
EXPECT_FALSE(set_->Contains(ptr));
}
// Leave a key for reader to test.
set_->Insert(reinterpret_cast<void*>(0x1337));
}
private:
LockFreeAddressHashSet* set_;
subtle::Atomic32* cancel_;
};
#if defined(THREAD_SANITIZER)
#define DISABLE_ON_TSAN(test_name) DISABLED_##test_name
#else
#define DISABLE_ON_TSAN(test_name) test_name
#endif // defined(THREAD_SANITIZER)
TEST_F(LockFreeAddressHashSetTest, DISABLE_ON_TSAN(ConcurrentAccess)) {
// The purpose of this test is to make sure adding/removing keys concurrently
// does not disrupt the state of other keys.
LockFreeAddressHashSet set(16);
subtle::Atomic32 cancel = 0;
auto thread = std::make_unique<WriterThread>(&set, &cancel);
thread->Start();
for (size_t i = 1; i <= 20; ++i)
set.Insert(reinterpret_cast<void*>(i));
// Remove some items to test empty nodes.
for (size_t i = 16; i <= 20; ++i)
set.Remove(reinterpret_cast<void*>(i));
for (size_t k = 0; k < 100000; ++k) {
for (size_t i = 1; i <= 30; ++i) {
EXPECT_EQ(i < 16, set.Contains(reinterpret_cast<void*>(i)));
}
}
subtle::Release_Store(&cancel, 1);
thread->Join();
EXPECT_TRUE(set.Contains(reinterpret_cast<void*>(0x1337)));
EXPECT_FALSE(set.Contains(reinterpret_cast<void*>(0xbadf00d)));
}
TEST_F(LockFreeAddressHashSetTest, BucketsUsage) {
// Test the uniformity of buckets usage.
size_t count = 10000;
LockFreeAddressHashSet set(16);
for (size_t i = 0; i < count; ++i)
set.Insert(reinterpret_cast<void*>(0x10000 + 0x10 * i));
size_t average_per_bucket = count / set.buckets_count();
for (size_t i = 0; i < set.buckets_count(); ++i) {
size_t usage = BucketSize(set, i);
EXPECT_LT(average_per_bucket * 95 / 100, usage);
EXPECT_GT(average_per_bucket * 105 / 100, usage);
}
}
} // namespace
} // namespace base
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "base/no_destructor.h" #include "base/no_destructor.h"
#include "base/partition_alloc_buildflags.h" #include "base/partition_alloc_buildflags.h"
#include "base/rand_util.h" #include "base/rand_util.h"
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include "base/threading/thread_local_storage.h" #include "base/threading/thread_local_storage.h"
#include "build/build_config.h" #include "build/build_config.h"
...@@ -40,8 +41,8 @@ bool g_deterministic; ...@@ -40,8 +41,8 @@ bool g_deterministic;
// A positive value if profiling is running, otherwise it's zero. // A positive value if profiling is running, otherwise it's zero.
Atomic32 g_running; Atomic32 g_running;
// Pointer to the current |SamplingHeapProfiler::SamplesMap|. // Pointer to the current |LockFreeAddressHashSet|.
AtomicWord g_current_samples_map; AtomicWord g_sampled_addresses_set;
// Sampling interval parameter, the mean value for intervals between samples. // Sampling interval parameter, the mean value for intervals between samples.
AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes; AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes;
...@@ -174,10 +175,11 @@ SamplingHeapProfiler* SamplingHeapProfiler::instance_; ...@@ -174,10 +175,11 @@ SamplingHeapProfiler* SamplingHeapProfiler::instance_;
SamplingHeapProfiler::SamplingHeapProfiler() { SamplingHeapProfiler::SamplingHeapProfiler() {
instance_ = this; instance_ = this;
auto samples_map = std::make_unique<SamplesMap>(64); auto sampled_addresses = std::make_unique<LockFreeAddressHashSet>(64);
base::subtle::NoBarrier_Store( base::subtle::NoBarrier_Store(
&g_current_samples_map, reinterpret_cast<AtomicWord>(samples_map.get())); &g_sampled_addresses_set,
sample_maps_.push(std::move(samples_map)); reinterpret_cast<AtomicWord>(sampled_addresses.get()));
sampled_addresses_stack_.push(std::move(sampled_addresses));
} }
// static // static
...@@ -333,15 +335,21 @@ void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated, ...@@ -333,15 +335,21 @@ void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated,
RecordStackTrace(&sample, skip_frames); RecordStackTrace(&sample, skip_frames);
for (auto* observer : observers_) for (auto* observer : observers_)
observer->SampleAdded(sample.ordinal, size, total_allocated); observer->SampleAdded(sample.ordinal, size, total_allocated);
EnsureNoRehashingMap().emplace(address, std::move(sample)); samples_.emplace(address, std::move(sample));
// TODO(alph): Sometimes RecordAlloc is called twice in a row without
// a RecordFree in between. Investigate it.
if (!sampled_addresses_set().Contains(address))
sampled_addresses_set().Insert(address);
BalanceAddressesHashSet();
} }
entered_.Set(false); entered_.Set(false);
} }
// static // static
void SamplingHeapProfiler::RecordFree(void* address) { void SamplingHeapProfiler::RecordFree(void* address) {
const SamplesMap& samples = SamplingHeapProfiler::samples(); if (UNLIKELY(address == nullptr))
if (UNLIKELY(samples.find(address) != samples.end())) return;
if (UNLIKELY(sampled_addresses_set().Contains(address)))
instance_->DoRecordFree(address); instance_->DoRecordFree(address);
} }
...@@ -353,43 +361,43 @@ void SamplingHeapProfiler::DoRecordFree(void* address) { ...@@ -353,43 +361,43 @@ void SamplingHeapProfiler::DoRecordFree(void* address) {
entered_.Set(true); entered_.Set(true);
{ {
base::AutoLock lock(mutex_); base::AutoLock lock(mutex_);
SamplesMap& samples = this->samples(); auto it = samples_.find(address);
auto it = samples.find(address); CHECK(it != samples_.end());
CHECK(it != samples.end());
for (auto* observer : observers_) for (auto* observer : observers_)
observer->SampleRemoved(it->second.ordinal); observer->SampleRemoved(it->second.ordinal);
samples.erase(it); samples_.erase(it);
sampled_addresses_set().Remove(address);
} }
entered_.Set(false); entered_.Set(false);
} }
SamplingHeapProfiler::SamplesMap& SamplingHeapProfiler::EnsureNoRehashingMap() { void SamplingHeapProfiler::BalanceAddressesHashSet() {
// The function makes sure we never rehash the current map in place. // Check if the load_factor of the current addresses hash set becomes higher
// Instead if it comes close to the rehashing boundary, we allocate a twice // than 1, allocate a new twice larger one, copy all the data,
// larger map, copy the samples into it, and atomically switch new readers // and switch to using it.
// to use the new map. // During the copy process no other writes are made to both sets
// as it's behind the lock.
// All the readers continue to use the old one until the atomic switch
// process takes place.
LockFreeAddressHashSet& current_set = sampled_addresses_set();
if (current_set.load_factor() < 1)
return;
auto new_set =
std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2);
new_set->Copy(current_set);
// Atomically switch all the new readers to the new set.
base::subtle::Release_Store(&g_sampled_addresses_set,
reinterpret_cast<AtomicWord>(new_set.get()));
// We still have to keep all the old maps alive to resolve the theoretical // We still have to keep all the old maps alive to resolve the theoretical
// race with readers in |RecordFree| that have already obtained the map, // race with readers in |RecordFree| that have already obtained the map,
// but haven't yet managed to access it. // but haven't yet managed to access it.
SamplesMap& samples = this->samples(); sampled_addresses_stack_.push(std::move(new_set));
size_t max_items_before_rehash =
static_cast<size_t>(samples.bucket_count() * samples.max_load_factor());
// Conservatively use 2 instead of 1 to workaround potential rounding errors.
bool may_rehash_on_insert = samples.size() + 2 >= max_items_before_rehash;
if (!may_rehash_on_insert)
return samples;
auto new_map = std::make_unique<SamplesMap>(samples.begin(), samples.end(),
samples.bucket_count() * 2);
base::subtle::Release_Store(&g_current_samples_map,
reinterpret_cast<AtomicWord>(new_map.get()));
sample_maps_.push(std::move(new_map));
return this->samples();
} }
// static // static
SamplingHeapProfiler::SamplesMap& SamplingHeapProfiler::samples() { LockFreeAddressHashSet& SamplingHeapProfiler::sampled_addresses_set() {
return *reinterpret_cast<SamplesMap*>( return *reinterpret_cast<LockFreeAddressHashSet*>(
base::subtle::NoBarrier_Load(&g_current_samples_map)); base::subtle::NoBarrier_Load(&g_sampled_addresses_set));
} }
// static // static
...@@ -432,7 +440,7 @@ std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples( ...@@ -432,7 +440,7 @@ std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
std::vector<Sample> samples; std::vector<Sample> samples;
{ {
base::AutoLock lock(mutex_); base::AutoLock lock(mutex_);
for (auto& it : this->samples()) { for (auto& it : samples_) {
Sample& sample = it.second; Sample& sample = it.second;
if (sample.ordinal > profile_id) if (sample.ordinal > profile_id)
samples.push_back(sample); samples.push_back(sample);
......
...@@ -20,6 +20,8 @@ namespace base { ...@@ -20,6 +20,8 @@ namespace base {
template <typename T> template <typename T>
class NoDestructor; class NoDestructor;
class LockFreeAddressHashSet;
// The class implements sampling profiling of native memory heap. // The class implements sampling profiling of native memory heap.
// It hooks on base::allocator and base::PartitionAlloc. // It hooks on base::allocator and base::PartitionAlloc.
// When started it selects and records allocation samples based on // When started it selects and records allocation samples based on
...@@ -81,8 +83,6 @@ class BASE_EXPORT SamplingHeapProfiler { ...@@ -81,8 +83,6 @@ class BASE_EXPORT SamplingHeapProfiler {
static SamplingHeapProfiler* GetInstance(); static SamplingHeapProfiler* GetInstance();
private: private:
using SamplesMap = std::unordered_map<void*, Sample>;
SamplingHeapProfiler(); SamplingHeapProfiler();
~SamplingHeapProfiler() = delete; ~SamplingHeapProfiler() = delete;
...@@ -96,12 +96,14 @@ class BASE_EXPORT SamplingHeapProfiler { ...@@ -96,12 +96,14 @@ class BASE_EXPORT SamplingHeapProfiler {
uint32_t skip_frames); uint32_t skip_frames);
void DoRecordFree(void* address); void DoRecordFree(void* address);
void RecordStackTrace(Sample*, uint32_t skip_frames); void RecordStackTrace(Sample*, uint32_t skip_frames);
SamplesMap& EnsureNoRehashingMap(); static LockFreeAddressHashSet& sampled_addresses_set();
static SamplesMap& samples();
void BalanceAddressesHashSet();
base::ThreadLocalBoolean entered_; base::ThreadLocalBoolean entered_;
base::Lock mutex_; base::Lock mutex_;
std::stack<std::unique_ptr<SamplesMap>> sample_maps_; std::stack<std::unique_ptr<LockFreeAddressHashSet>> sampled_addresses_stack_;
std::unordered_map<void*, Sample> samples_;
std::vector<SamplesObserver*> observers_; std::vector<SamplesObserver*> observers_;
uint32_t last_sample_ordinal_ = 0; uint32_t last_sample_ordinal_ = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment