Switch to a simple linked-list for worker thread pool

The goal was to avoid alloc/free during the thread teardown callback, and this CL provides a very simple and clear implementation. r=rtenneti BUG=104696 Review URL: http://codereview.chromium.org/8597017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@110856 0039d316-1c4b-4281-b951-d872f2087c98

Switch to a simple linked-list for worker thread pool
The goal was to avoid alloc/free during the thread teardown callback, and this CL provides a very simple and clear implementation. r=rtenneti BUG=104696 Review URL: http://codereview.chromium.org/8597017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@110856 0039d316-1c4b-4281-b951-d872f2087c98
26cdeb96 · jar@chromium.org · 3e6743f1 · 26cdeb96 · 26cdeb96
Commit 26cdeb96 authored Nov 20, 2011 by jar@chromium.org
Hide whitespace changes
Inline Side-by-side

Showing with 73 additions and 123 deletions

base/tracked_objects.cc base/tracked_objects.cc +25 -71

base/tracked_objects.h base/tracked_objects.h +48 -52

No files found.
--- a/base/tracked_objects.cc
+++ b/base/tracked_objects.cc
@@ -135,7 +135,7 @@ int ThreadData::incarnation_counter_ = 0;
 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
 // static
-ThreadData::ThreadDataPool* ThreadData::unregistered_thread_data_pool_ = NULL;
+ThreadData* ThreadData::first_retired_worker_ = NULL;
 // static
 base::LazyInstance<base::Lock,
@@ -148,18 +148,20 @@ ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED;
 ThreadData::ThreadData(const std::string& suggested_name)
    : incarnation_count_for_pool_(-1),
      next_(NULL),
+      next_retired_worker_(NULL),
      worker_thread_number_(0) {
  DCHECK_GE(suggested_name.size(), 0u);
  thread_name_ = suggested_name;
  PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 }
-ThreadData::ThreadData(size_t thread_number)
+ThreadData::ThreadData(int thread_number)
    : incarnation_count_for_pool_(-1),
      next_(NULL),
+      next_retired_worker_(NULL),
      worker_thread_number_(thread_number) {
-  CHECK_NE(thread_number, 0u);
+  CHECK_GT(thread_number, 0);
-  base::StringAppendF(&thread_name_, "WorkerThread-%"PRIuS, thread_number);
+  base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
  PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 }
@@ -195,25 +197,22 @@ ThreadData* ThreadData::Get() {
  // We must be a worker thread, since we didn't pre-register.
  ThreadData* worker_thread_data = NULL;
-  size_t thread_number = 0;
+  int thread_number = 0;
  {
    base::AutoLock lock(*list_lock_.Pointer());
-    if (!unregistered_thread_data_pool_)
+    if (first_retired_worker_) {
-      unregistered_thread_data_pool_ = new ThreadDataPool;
+      worker_thread_data = first_retired_worker_;
-    if (!unregistered_thread_data_pool_->empty()) {
+      first_retired_worker_ = first_retired_worker_->next_retired_worker_;
-      worker_thread_data =
+      worker_thread_data->next_retired_worker_ = NULL;
-        const_cast<ThreadData*>(unregistered_thread_data_pool_->top());
-      unregistered_thread_data_pool_->pop();
    } else {
      thread_number = ++thread_number_counter_;
-      unregistered_thread_data_pool_->reserve(thread_number);
    }
  }
  // If we can't find a previously used instance, then we have to create one.
  if (!worker_thread_data)
    worker_thread_data = new ThreadData(thread_number);
-  DCHECK_GT(worker_thread_data->worker_thread_number_, 0u);
+  DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
  tls_index_.Set(worker_thread_data);
  return worker_thread_data;
@@ -228,14 +227,17 @@ void ThreadData::OnThreadTermination(void* thread_data) {
  reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
 }
-void ThreadData::OnThreadTerminationCleanup() const {
+void ThreadData::OnThreadTerminationCleanup() {
  if (!worker_thread_number_)
    return;
  base::AutoLock lock(*list_lock_.Pointer());
  if (incarnation_counter_ != incarnation_count_for_pool_)
    return;  // ThreadData was constructed in an earlier unit test.
-  // The following will never have to do an allocation.
+  // We must NOT do any allocations during this callback.
-  unregistered_thread_data_pool_->push(this);
+  // Using the simple linked lists avoids all allocations.
+  DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
+  this->next_retired_worker_ = first_retired_worker_;
+  first_retired_worker_ = this;
 }
 // static
@@ -512,14 +514,18 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
  if (!InitializeAndSetTrackingStatus(false))
    return;
  ThreadData* thread_data_list;
-  ThreadDataPool* final_pool;
  {
    base::AutoLock lock(*list_lock_.Pointer());
    thread_data_list = all_thread_data_list_head_;
    all_thread_data_list_head_ = NULL;
-    final_pool = unregistered_thread_data_pool_;
-    unregistered_thread_data_pool_ = NULL;
    ++incarnation_counter_;
+    // To be clean, break apart the retired worker list (though we leak them).
+    while(first_retired_worker_) {
+      ThreadData* worker = first_retired_worker_;
+      CHECK_GT(worker->worker_thread_number_, 0);
+      first_retired_worker_ = worker->next_retired_worker_;
+      worker->next_retired_worker_ = NULL;
+    }
  }
  // Put most global static back in pristine shape.
@@ -535,15 +541,6 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
  // When we want to cleanup (on a single thread), here is what we do.
-  if (final_pool) {
-    // The thread_data_list contains *all* the instances, and we'll use it to
-    // delete them.  This pool has pointers to some instances, and we just
-    // have to drop those pointers (and not do the deletes here).
-    while (!final_pool->empty())
-      final_pool->pop();
-    delete final_pool;
-  }
  // Do actual recursive delete in all ThreadData instances.
  while (thread_data_list) {
    ThreadData* next_thread_data = thread_data_list;
@@ -558,49 +555,6 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
  }
 }
-//------------------------------------------------------------------------------
-// Small partial implementation of a stack that never has to allocate during a
-// push() operation, because it is always prepared to accept the maximum number
-// of ThreadData instances (all the worker thread related instances).
-ThreadData::ThreadDataPool::ThreadDataPool() : empty_slot_(0) {};
-ThreadData::ThreadDataPool::~ThreadDataPool() {};
-bool ThreadData::ThreadDataPool::empty() const { return empty_slot_ == 0; }
-void ThreadData::ThreadDataPool::reserve(size_t largest_worker_pool_number) {
-  // Worker pool numbers start at 1, and exclude 0, so the number is exactly
-  // the least size needed.
-  // Due to asynchronous construction of worker-pool numbers (and associated
-  // ThreadData), we might not hear about the numbers sequentially.
-  if (largest_worker_pool_number > stack_.size())
-    stack_.resize(largest_worker_pool_number);
-}
-const ThreadData* ThreadData::ThreadDataPool::top() const {
-  if (empty_slot_ > 0)
-    return stack_[empty_slot_ - 1];
-  NOTREACHED();
-  return NULL;
-}
-void ThreadData::ThreadDataPool::push(const ThreadData* thread_data) {
-  if (empty_slot_ < stack_.size()) {
-    stack_[empty_slot_] = thread_data;
-    ++empty_slot_;
-    return;
-  }
-  NOTREACHED();
-}
-void ThreadData::ThreadDataPool::pop() {
-  if (empty_slot_ > 0) {
-    --empty_slot_;
-    return;
-  }
-  NOTREACHED();
-}
 //------------------------------------------------------------------------------
 // Individual 3-tuple of birth (place and thread) along with death thread, and
 // the accumulated stats for instances (DeathData).

--- a/base/tracked_objects.h
+++ b/base/tracked_objects.h
@@ -137,8 +137,10 @@
 //
 // A DataCollector is a container object that holds a set of Snapshots. The
 // statistics in a snapshot are gathered asynhcronously relative to their
-// ongoing updates.  It is possible, though highly unlikely, that stats such
+// ongoing updates.  It is possible, though highly unlikely, that stats could be
-// as a 64bit counter could be incorrectly recorded by this process.  The
+// incorrectly recorded by this process (all data is held in 32 bit ints, but we
+// are not atomically collecting all data, so we could have count that does not,
+// for example, match with the number of durations we accumulated).  The
 // advantage to having fast (non-atomic) updates of the data outweighs the
 // minimal risk of a singular corrupt statistic snapshot (only the snapshot
 // could be corrupt, not the underlying and ongoing statistic).  In constrast,
@@ -148,23 +150,40 @@
 //
 // After an array of Snapshots instances are collected into a DataCollector,
 // they need to be prepared for displaying our output.  We currently implement a
-// direct rendering to HTML, but we will soon have a JSON serialization as well.
+// serialization into a Value hierarchy, which is automatically translated to
+// JSON when supplied to rendering Java Scirpt.
-// For direct HTML display, the data must be sorted, and possibly aggregated
+//
-// (example: how many threads are in a specific consecutive set of Snapshots?
+// TODO(jar): We can implement a Snapshot system that *tries* to grab the
-// What was the total birth count for that set? etc.).  Aggregation instances
+// snapshots on the source threads *when* they have MessageLoops available
-// collect running sums of any set of snapshot instances, and are used to print
+// (worker threads don't have message loops generally, and hence gathering from
-// sub-totals in an about:profiler page.
+// them will continue to be asynchronous).  We had an implementation of this in
+// the past, but the difficulty is dealing with message loops being terminated.
+// We can *try* to spam the available threads via some message loop proxy to
+// achieve this feat, and it *might* be valuable when we are colecting data for
+// upload via UMA (where correctness of data may be more significant than for a
+// single screen of about:profiler).
+//
+// TODO(jar): We need to save a single sample in each DeathData instance of the
+// times recorded.  This sample should be selected in a uniformly random way.
 //
-// TODO(jar): I need to store DataCollections, and provide facilities for taking
+// TODO(jar): We should support (optionally) the recording of parent-child
-// the difference between two gathered DataCollections.  For now, I'm just
+// relationships for tasks.  This should be done by detecting what tasks are
-// adding a hack that Reset()s to zero all counts and stats.  This is also
+// Born during the running of a parent task.  The resulting data can be used by
+// a smarter profiler to aggregate the cost of a series of child tasks into
+// the ancestor task.  It can also be used to illuminate what child or parent is
+// related to each task.
+//
+// TODO(jar): We need to store DataCollections, and provide facilities for
+// taking the difference between two gathered DataCollections.  For now, we're
+// just adding a hack that Reset()s to zero all counts and stats.  This is also
 // done in a slighly thread-unsafe fashion, as the resetting is done
 // asynchronously relative to ongoing updates (but all data is 32 bit in size).
 // For basic profiling, this will work "most of the time," and should be
 // sufficient... but storing away DataCollections is the "right way" to do this.
 // We'll accomplish this via JavaScript storage of snapshots, and then we'll
-// remove the Reset() methods.
+// remove the Reset() methods.  We may also need a short-term-max value in
+// DeathData that is reset (as synchronously as possible) during each snapshot.
+// This will facilitate displaying a max value for each snapshot period.
 class MessageLoop;
@@ -254,9 +273,6 @@ class BASE_EXPORT DeathData {
  // realtime statistics, and only used in snapshots and aggregatinos.
  void AddDeathData(const DeathData& other);
-  // Simple print of internal state for use in line of HTML.
-  void WriteHTML(std::string* output) const;
  // Construct a DictionaryValue instance containing all our stats. The caller
  // assumes ownership of the returned instance.
  base::DictionaryValue* ToValue() const;
@@ -275,10 +291,6 @@ class BASE_EXPORT DeathData {
    DurationInt duration() const { return duration_; }
    DurationInt max() const { return max_; }
-    // Emits HTML formated description of members, assuming |count| instances
-    // when calculating averages.
-    void WriteHTML(int count, std::string* output) const;
    // Agggegate data into our state.
    void AddData(const Data& other);
    void AddDuration(DurationInt duration);
@@ -486,7 +498,7 @@ class BASE_EXPORT ThreadData {
  ThreadData* next() const { return next_; }
  // Using our lock, make a copy of the specified maps.  These calls may arrive
  // from non-local threads, and are used to quickly scan data from all threads
-  // in order to build an HTML page for about:profiler.
+  // in order to build JSON for about:profiler.
  void SnapshotBirthMap(BirthMap *output) const;
  void SnapshotDeathMap(DeathMap *output) const;
  // -------- end of should be private methods.
@@ -525,30 +537,8 @@ class BASE_EXPORT ThreadData {
  // in production code.
  friend class TrackedObjectsTest;
-  // Implment a stack that avoids allocations during a push() by having enough
-  // space ahead of time.
-  class ThreadDataPool {
-   public:
-    ThreadDataPool();
-    ~ThreadDataPool();
-    // Make sure the stack is large enough to support the indicated number of
-    // elements.
-    void reserve(size_t largest_worker_pool_number);
-    bool empty() const;
-    const ThreadData* top() const;
-    void push(const ThreadData* thread_data);
-    void pop();
-   private:
-    std::vector<const ThreadData*> stack_;
-    size_t empty_slot_;
-    DISALLOW_COPY_AND_ASSIGN(ThreadDataPool);
-  };
  // Worker thread construction creates a name since there is none.
-  explicit ThreadData(size_t thread_number);
+  explicit ThreadData(int thread_number);
  // Message loop based construction should provide a name.
  explicit ThreadData(const std::string& suggested_name);
@@ -577,7 +567,7 @@ class BASE_EXPORT ThreadData {
  // This method should be called when a worker thread terminates, so that we
  // can save all the thread data into a cache of reusable ThreadData instances.
-  void OnThreadTerminationCleanup() const;
+  void OnThreadTerminationCleanup();
  // Cleans up data structures, and returns statics to near pristine (mostly
  // uninitialized) state.  If there is any chance that other threads are still
@@ -593,16 +583,17 @@ class BASE_EXPORT ThreadData {
  // We use thread local store to identify which ThreadData to interact with.
  static base::ThreadLocalStorage::Slot tls_index_;
+  // List of ThreadData instances for use with worker threads. When a worker
+  // thread is done (terminated), we push it onto this llist.  When a new worker
+  // thread is created, we first try to re-use a ThreadData instance from the
+  // list, and if none are available, construct a new one.
+  // This is only accessed while list_lock_ is held.
+  static ThreadData* first_retired_worker_;
  // Link to the most recently created instance (starts a null terminated list).
  // The list is traversed by about:profiler when it needs to snapshot data.
  // This is only accessed while list_lock_ is held.
  static ThreadData* all_thread_data_list_head_;
-  // Set of ThreadData instances for use with worker threads. When a worker
-  // thread is done (terminating), we push it into this pool.  When a new worker
-  // thread is created, we first try to re-use a ThreadData instance from the
-  // pool, and if none are available, construct a new one.
-  // This is only accessed while list_lock_ is held.
-  static ThreadDataPool* unregistered_thread_data_pool_;
  // The next available thread number.  This should only be accessed when the
  // list_lock_ is held.
  static int thread_number_counter_;
@@ -631,6 +622,11 @@ class BASE_EXPORT ThreadData {
  // data).
  ThreadData* next_;
+  // Pointer to another ThreadData instance for a Worker-Thread that has been
+  // retired (its thread was terminated).  This value is non-NULL only for a
+  // retired ThreadData associated with a Worker-Thread.
+  ThreadData* next_retired_worker_;
  // The name of the thread that is being recorded.  If this thread has no
  // message_loop, then this is a worker thread, with a sequence number postfix.
  std::string thread_name_;
@@ -639,7 +635,7 @@ class BASE_EXPORT ThreadData {
  // stored in the unregistered_thread_data_pool_ when not in use.
  // Value is zero when it is not a worker thread.  Value is a positive integer
  // corresponding to the created thread name if it is a worker thread.
-  size_t worker_thread_number_;
+  int worker_thread_number_;
  // A map used on each thread to keep track of Births on this thread.
  // This map should only be accessed on the thread it was constructed on.