Fix an implementation bug in poisson sampling allocator.

This CL fixes a longstanding implementation bug in poisson sampling allocator. The poisson sampling allocator consistently oversampled allocations, particularly small ones. The root cause is that muted allocations continue to affect g_accumulated_bytes_tls. If muted allocations were randomly distributed with normal allocations, this would have no net effect. However, muted allocations always happen right after g_accumulated_bytes_tls is reset. This non-randomness means that muted allocations are almost never sampled, but cause other allocations to be over-sampled. This CL fixes it by resetting g_accumulated_bytes_tls back to its original value after we unmute. This CL also fixes a test bug. To pick up the test-specified sampling rate, we make several large allocations, flushing the TLS variables which were still keyed to the old sampling rate. Bug: 843467 Change-Id: Idea9fab3dfadc014375a3428a7839c76fc635556 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1956266 Commit-Queue: Erik Chen <erikchen@chromium.org> Reviewed-by: Etienne Bergeron <etienneb@chromium.org> Auto-Submit: Erik Chen <erikchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#723064}

Fix an implementation bug in poisson sampling allocator.
This CL fixes a longstanding implementation bug in poisson sampling allocator. The poisson sampling allocator consistently oversampled allocations, particularly small ones. The root cause is that muted allocations continue to affect g_accumulated_bytes_tls. If muted allocations were randomly distributed with normal allocations, this would have no net effect. However, muted allocations always happen right after g_accumulated_bytes_tls is reset. This non-randomness means that muted allocations are almost never sampled, but cause other allocations to be over-sampled. This CL fixes it by resetting g_accumulated_bytes_tls back to its original value after we unmute. This CL also fixes a test bug. To pick up the test-specified sampling rate, we make several large allocations, flushing the TLS variables which were still keyed to the old sampling rate. Bug: 843467 Change-Id: Idea9fab3dfadc014375a3428a7839c76fc635556 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1956266 Commit-Queue: Erik Chen <erikchen@chromium.org> Reviewed-by: Etienne Bergeron <etienneb@chromium.org> Auto-Submit: Erik Chen <erikchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#723064}
eb53085d · Erik Chen · Commit Bot · 58e4c193 · eb53085d · eb53085d
Commit eb53085d authored Dec 09, 2019 by Erik Chen Committed by Commit Bot Dec 09, 2019
3 changed files
--- a/base/sampling_heap_profiler/poisson_allocation_sampler.cc
+++ b/base/sampling_heap_profiler/poisson_allocation_sampler.cc
@@ -103,6 +103,11 @@ thread_local bool g_internal_reentry_guard;
 // Accumulated bytes towards sample thread local key.
 thread_local intptr_t g_accumulated_bytes_tls;

+// Used as a workaround to avoid bias from muted samples. See
+// ScopedMuteThreadSamples for more details.
+thread_local intptr_t g_accumulated_bytes_tls_snapshot;
+const intptr_t kAccumulatedBytesOffset = 1 << 29;
+
 // A boolean used to distinguish first allocation on a thread:
 //   false - first allocation on the thread;
 //   true  - otherwise.
@@ -301,11 +306,25 @@ void PartitionFreeHook(void* address) {
 PoissonAllocationSampler::ScopedMuteThreadSamples::ScopedMuteThreadSamples() {
  DCHECK(!g_internal_reentry_guard);
  g_internal_reentry_guard = true;
+
+  // We mute thread samples immediately after taking a sample, which is when we
+  // reset g_accumulated_bytes_tls. This breaks the random sampling requirement
+  // of the poisson process, and causes us to systematically overcount all other
+  // allocations. That's because muted allocations rarely trigger a sample
+  // [which would cause them to be ignored] since they occur right after
+  // g_accumulated_bytes_tls is reset.
+  //
+  // To counteract this, we drop g_accumulated_bytes_tls by a large, fixed
+  // amount to lower the probability that a sample is taken to close to 0. Then
+  // we reset it after we're done muting thread samples.
+  g_accumulated_bytes_tls_snapshot = g_accumulated_bytes_tls;
+  g_accumulated_bytes_tls -= kAccumulatedBytesOffset;
 }

 PoissonAllocationSampler::ScopedMuteThreadSamples::~ScopedMuteThreadSamples() {
  DCHECK(g_internal_reentry_guard);
  g_internal_reentry_guard = false;
+  g_accumulated_bytes_tls = g_accumulated_bytes_tls_snapshot;
 }

 // static
@@ -433,7 +452,6 @@ void PoissonAllocationSampler::DoRecordAlloc(intptr_t accumulated_bytes,
    return;

  size_t mean_interval = g_sampling_interval.load(std::memory_order_relaxed);
-
  if (UNLIKELY(!g_sampling_interval_initialized_tls)) {
    g_sampling_interval_initialized_tls = true;
    // This is the very first allocation on the thread. It always makes it

--- a/chrome/browser/profiling_host/memlog_browsertest.cc
+++ b/chrome/browser/profiling_host/memlog_browsertest.cc
@@ -96,12 +96,9 @@ std::vector<TestParam> GetParams() {
  params.push_back({Mode::kBrowser, mojom::StackMode::NATIVE_WITH_THREAD_NAMES,
                    true /* start_profiling_with_command_line_flag */});

-  // TODO(https://crbug.com/843467): Teporarily disabled due to failures on
-  // bots.
  // Test that we can start profiling without command line flag.
-  // params.push_back({Mode::kBrowser,
-  // mojom::StackMode::NATIVE_WITH_THREAD_NAMES,
-  //                  false /* start_profiling_with_command_line_flag */});
+  params.push_back({Mode::kBrowser, mojom::StackMode::NATIVE_WITH_THREAD_NAMES,
+                    false /* start_profiling_with_command_line_flag */});

  // Test that we can start profiling for the renderer process.
  params.push_back({Mode::kAllRenderers,

--- a/components/heap_profiling/test_driver.cc
+++ b/components/heap_profiling/test_driver.cc
@@ -48,15 +48,15 @@ const char kThreadName[] = "kThreadName";
 //
 // Make some specific allocations in Browser to do a deeper test of the
 // allocation tracking.
-constexpr int kMallocAllocSize = 7907;
+constexpr int kMallocAllocSize = 797;
 constexpr int kMallocAllocCount = 157;

 constexpr int kVariadicAllocCount = 157;

 // The sample rate should not affect the sampled allocations. Intentionally
 // choose an odd number.
-constexpr int kSampleRate = 7777;
-constexpr int kSamplingAllocSize = 1000;
+constexpr int kSampleRate = 777;
+constexpr int kSamplingAllocSize = 100;
 constexpr int kSamplingAllocCount = 1000;
 const char kSamplingAllocTypeName[] = "kSamplingAllocTypeName";

@@ -757,6 +757,16 @@ void TestDriver::MakeTestAllocations() {

  base::PlatformThread::SetName(kThreadName);

+  // Profiling is turned on by default with a sampling interval of 1MB. We reset
+  // the sampling interval, but the change isn't picked by the thread-local
+  // cache until we make a sufficient number of allocations. This is an
+  // implementation limitation, but it's easy to work around by making a couple
+  // of large allocations.
+  constexpr int kFourMegsAllocation = 1 << 22;
+  for (int i = 0; i < 10; ++i) {
+    leaks_.push_back(static_cast<char*>(malloc(kFourMegsAllocation)));
+  }
+
  // In sampling mode, only sampling allocations are relevant.
  if (!IsRecordingAllAllocations()) {
    leaks_.reserve(kSamplingAllocCount);
@@ -791,8 +801,8 @@ void TestDriver::MakeTestAllocations() {
    TRACE_EVENT0(kTestCategory, kVariadicEvent);

    for (int i = 0; i < kVariadicAllocCount; ++i) {
-      leaks_.push_back(new char[i + 8000]);  // Variadic allocation.
-      total_variadic_allocations_ += i + 8000;
+      leaks_.push_back(new char[i + 800]);  // Variadic allocation.
+      total_variadic_allocations_ += i + 800;
    }
  }