Commit 26cdeb96 authored by jar@chromium.org's avatar jar@chromium.org

Switch to a simple linked-list for worker thread pool

The goal was to avoid alloc/free during the thread
teardown callback, and this CL provides a very simple
and clear implementation.

r=rtenneti
BUG=104696
Review URL: http://codereview.chromium.org/8597017

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@110856 0039d316-1c4b-4281-b951-d872f2087c98
parent 3e6743f1
...@@ -135,7 +135,7 @@ int ThreadData::incarnation_counter_ = 0; ...@@ -135,7 +135,7 @@ int ThreadData::incarnation_counter_ = 0;
ThreadData* ThreadData::all_thread_data_list_head_ = NULL; ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
// static // static
ThreadData::ThreadDataPool* ThreadData::unregistered_thread_data_pool_ = NULL; ThreadData* ThreadData::first_retired_worker_ = NULL;
// static // static
base::LazyInstance<base::Lock, base::LazyInstance<base::Lock,
...@@ -148,18 +148,20 @@ ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED; ...@@ -148,18 +148,20 @@ ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED;
ThreadData::ThreadData(const std::string& suggested_name) ThreadData::ThreadData(const std::string& suggested_name)
: incarnation_count_for_pool_(-1), : incarnation_count_for_pool_(-1),
next_(NULL), next_(NULL),
next_retired_worker_(NULL),
worker_thread_number_(0) { worker_thread_number_(0) {
DCHECK_GE(suggested_name.size(), 0u); DCHECK_GE(suggested_name.size(), 0u);
thread_name_ = suggested_name; thread_name_ = suggested_name;
PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
} }
ThreadData::ThreadData(size_t thread_number) ThreadData::ThreadData(int thread_number)
: incarnation_count_for_pool_(-1), : incarnation_count_for_pool_(-1),
next_(NULL), next_(NULL),
next_retired_worker_(NULL),
worker_thread_number_(thread_number) { worker_thread_number_(thread_number) {
CHECK_NE(thread_number, 0u); CHECK_GT(thread_number, 0);
base::StringAppendF(&thread_name_, "WorkerThread-%"PRIuS, thread_number); base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
} }
...@@ -195,25 +197,22 @@ ThreadData* ThreadData::Get() { ...@@ -195,25 +197,22 @@ ThreadData* ThreadData::Get() {
// We must be a worker thread, since we didn't pre-register. // We must be a worker thread, since we didn't pre-register.
ThreadData* worker_thread_data = NULL; ThreadData* worker_thread_data = NULL;
size_t thread_number = 0; int thread_number = 0;
{ {
base::AutoLock lock(*list_lock_.Pointer()); base::AutoLock lock(*list_lock_.Pointer());
if (!unregistered_thread_data_pool_) if (first_retired_worker_) {
unregistered_thread_data_pool_ = new ThreadDataPool; worker_thread_data = first_retired_worker_;
if (!unregistered_thread_data_pool_->empty()) { first_retired_worker_ = first_retired_worker_->next_retired_worker_;
worker_thread_data = worker_thread_data->next_retired_worker_ = NULL;
const_cast<ThreadData*>(unregistered_thread_data_pool_->top());
unregistered_thread_data_pool_->pop();
} else { } else {
thread_number = ++thread_number_counter_; thread_number = ++thread_number_counter_;
unregistered_thread_data_pool_->reserve(thread_number);
} }
} }
// If we can't find a previously used instance, then we have to create one. // If we can't find a previously used instance, then we have to create one.
if (!worker_thread_data) if (!worker_thread_data)
worker_thread_data = new ThreadData(thread_number); worker_thread_data = new ThreadData(thread_number);
DCHECK_GT(worker_thread_data->worker_thread_number_, 0u); DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
tls_index_.Set(worker_thread_data); tls_index_.Set(worker_thread_data);
return worker_thread_data; return worker_thread_data;
...@@ -228,14 +227,17 @@ void ThreadData::OnThreadTermination(void* thread_data) { ...@@ -228,14 +227,17 @@ void ThreadData::OnThreadTermination(void* thread_data) {
reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup(); reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
} }
void ThreadData::OnThreadTerminationCleanup() const { void ThreadData::OnThreadTerminationCleanup() {
if (!worker_thread_number_) if (!worker_thread_number_)
return; return;
base::AutoLock lock(*list_lock_.Pointer()); base::AutoLock lock(*list_lock_.Pointer());
if (incarnation_counter_ != incarnation_count_for_pool_) if (incarnation_counter_ != incarnation_count_for_pool_)
return; // ThreadData was constructed in an earlier unit test. return; // ThreadData was constructed in an earlier unit test.
// The following will never have to do an allocation. // We must NOT do any allocations during this callback.
unregistered_thread_data_pool_->push(this); // Using the simple linked lists avoids all allocations.
DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
this->next_retired_worker_ = first_retired_worker_;
first_retired_worker_ = this;
} }
// static // static
...@@ -512,14 +514,18 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { ...@@ -512,14 +514,18 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
if (!InitializeAndSetTrackingStatus(false)) if (!InitializeAndSetTrackingStatus(false))
return; return;
ThreadData* thread_data_list; ThreadData* thread_data_list;
ThreadDataPool* final_pool;
{ {
base::AutoLock lock(*list_lock_.Pointer()); base::AutoLock lock(*list_lock_.Pointer());
thread_data_list = all_thread_data_list_head_; thread_data_list = all_thread_data_list_head_;
all_thread_data_list_head_ = NULL; all_thread_data_list_head_ = NULL;
final_pool = unregistered_thread_data_pool_;
unregistered_thread_data_pool_ = NULL;
++incarnation_counter_; ++incarnation_counter_;
// To be clean, break apart the retired worker list (though we leak them).
while(first_retired_worker_) {
ThreadData* worker = first_retired_worker_;
CHECK_GT(worker->worker_thread_number_, 0);
first_retired_worker_ = worker->next_retired_worker_;
worker->next_retired_worker_ = NULL;
}
} }
// Put most global static back in pristine shape. // Put most global static back in pristine shape.
...@@ -535,15 +541,6 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { ...@@ -535,15 +541,6 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
// When we want to cleanup (on a single thread), here is what we do. // When we want to cleanup (on a single thread), here is what we do.
if (final_pool) {
// The thread_data_list contains *all* the instances, and we'll use it to
// delete them. This pool has pointers to some instances, and we just
// have to drop those pointers (and not do the deletes here).
while (!final_pool->empty())
final_pool->pop();
delete final_pool;
}
// Do actual recursive delete in all ThreadData instances. // Do actual recursive delete in all ThreadData instances.
while (thread_data_list) { while (thread_data_list) {
ThreadData* next_thread_data = thread_data_list; ThreadData* next_thread_data = thread_data_list;
...@@ -558,49 +555,6 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { ...@@ -558,49 +555,6 @@ void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
} }
} }
//------------------------------------------------------------------------------
// Small partial implementation of a stack that never has to allocate during a
// push() operation, because it is always prepared to accept the maximum number
// of ThreadData instances (all the worker thread related instances).
ThreadData::ThreadDataPool::ThreadDataPool() : empty_slot_(0) {};
ThreadData::ThreadDataPool::~ThreadDataPool() {};
bool ThreadData::ThreadDataPool::empty() const { return empty_slot_ == 0; }
void ThreadData::ThreadDataPool::reserve(size_t largest_worker_pool_number) {
// Worker pool numbers start at 1, and exclude 0, so the number is exactly
// the least size needed.
// Due to asynchronous construction of worker-pool numbers (and associated
// ThreadData), we might not hear about the numbers sequentially.
if (largest_worker_pool_number > stack_.size())
stack_.resize(largest_worker_pool_number);
}
const ThreadData* ThreadData::ThreadDataPool::top() const {
if (empty_slot_ > 0)
return stack_[empty_slot_ - 1];
NOTREACHED();
return NULL;
}
void ThreadData::ThreadDataPool::push(const ThreadData* thread_data) {
if (empty_slot_ < stack_.size()) {
stack_[empty_slot_] = thread_data;
++empty_slot_;
return;
}
NOTREACHED();
}
void ThreadData::ThreadDataPool::pop() {
if (empty_slot_ > 0) {
--empty_slot_;
return;
}
NOTREACHED();
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Individual 3-tuple of birth (place and thread) along with death thread, and // Individual 3-tuple of birth (place and thread) along with death thread, and
// the accumulated stats for instances (DeathData). // the accumulated stats for instances (DeathData).
......
...@@ -137,8 +137,10 @@ ...@@ -137,8 +137,10 @@
// //
// A DataCollector is a container object that holds a set of Snapshots. The // A DataCollector is a container object that holds a set of Snapshots. The
// statistics in a snapshot are gathered asynhcronously relative to their // statistics in a snapshot are gathered asynhcronously relative to their
// ongoing updates. It is possible, though highly unlikely, that stats such // ongoing updates. It is possible, though highly unlikely, that stats could be
// as a 64bit counter could be incorrectly recorded by this process. The // incorrectly recorded by this process (all data is held in 32 bit ints, but we
// are not atomically collecting all data, so we could have count that does not,
// for example, match with the number of durations we accumulated). The
// advantage to having fast (non-atomic) updates of the data outweighs the // advantage to having fast (non-atomic) updates of the data outweighs the
// minimal risk of a singular corrupt statistic snapshot (only the snapshot // minimal risk of a singular corrupt statistic snapshot (only the snapshot
// could be corrupt, not the underlying and ongoing statistic). In constrast, // could be corrupt, not the underlying and ongoing statistic). In constrast,
...@@ -148,23 +150,40 @@ ...@@ -148,23 +150,40 @@
// //
// After an array of Snapshots instances are collected into a DataCollector, // After an array of Snapshots instances are collected into a DataCollector,
// they need to be prepared for displaying our output. We currently implement a // they need to be prepared for displaying our output. We currently implement a
// direct rendering to HTML, but we will soon have a JSON serialization as well. // serialization into a Value hierarchy, which is automatically translated to
// JSON when supplied to rendering Java Scirpt.
// For direct HTML display, the data must be sorted, and possibly aggregated //
// (example: how many threads are in a specific consecutive set of Snapshots? // TODO(jar): We can implement a Snapshot system that *tries* to grab the
// What was the total birth count for that set? etc.). Aggregation instances // snapshots on the source threads *when* they have MessageLoops available
// collect running sums of any set of snapshot instances, and are used to print // (worker threads don't have message loops generally, and hence gathering from
// sub-totals in an about:profiler page. // them will continue to be asynchronous). We had an implementation of this in
// the past, but the difficulty is dealing with message loops being terminated.
// We can *try* to spam the available threads via some message loop proxy to
// achieve this feat, and it *might* be valuable when we are colecting data for
// upload via UMA (where correctness of data may be more significant than for a
// single screen of about:profiler).
//
// TODO(jar): We need to save a single sample in each DeathData instance of the
// times recorded. This sample should be selected in a uniformly random way.
// //
// TODO(jar): I need to store DataCollections, and provide facilities for taking // TODO(jar): We should support (optionally) the recording of parent-child
// the difference between two gathered DataCollections. For now, I'm just // relationships for tasks. This should be done by detecting what tasks are
// adding a hack that Reset()s to zero all counts and stats. This is also // Born during the running of a parent task. The resulting data can be used by
// a smarter profiler to aggregate the cost of a series of child tasks into
// the ancestor task. It can also be used to illuminate what child or parent is
// related to each task.
//
// TODO(jar): We need to store DataCollections, and provide facilities for
// taking the difference between two gathered DataCollections. For now, we're
// just adding a hack that Reset()s to zero all counts and stats. This is also
// done in a slighly thread-unsafe fashion, as the resetting is done // done in a slighly thread-unsafe fashion, as the resetting is done
// asynchronously relative to ongoing updates (but all data is 32 bit in size). // asynchronously relative to ongoing updates (but all data is 32 bit in size).
// For basic profiling, this will work "most of the time," and should be // For basic profiling, this will work "most of the time," and should be
// sufficient... but storing away DataCollections is the "right way" to do this. // sufficient... but storing away DataCollections is the "right way" to do this.
// We'll accomplish this via JavaScript storage of snapshots, and then we'll // We'll accomplish this via JavaScript storage of snapshots, and then we'll
// remove the Reset() methods. // remove the Reset() methods. We may also need a short-term-max value in
// DeathData that is reset (as synchronously as possible) during each snapshot.
// This will facilitate displaying a max value for each snapshot period.
class MessageLoop; class MessageLoop;
...@@ -254,9 +273,6 @@ class BASE_EXPORT DeathData { ...@@ -254,9 +273,6 @@ class BASE_EXPORT DeathData {
// realtime statistics, and only used in snapshots and aggregatinos. // realtime statistics, and only used in snapshots and aggregatinos.
void AddDeathData(const DeathData& other); void AddDeathData(const DeathData& other);
// Simple print of internal state for use in line of HTML.
void WriteHTML(std::string* output) const;
// Construct a DictionaryValue instance containing all our stats. The caller // Construct a DictionaryValue instance containing all our stats. The caller
// assumes ownership of the returned instance. // assumes ownership of the returned instance.
base::DictionaryValue* ToValue() const; base::DictionaryValue* ToValue() const;
...@@ -275,10 +291,6 @@ class BASE_EXPORT DeathData { ...@@ -275,10 +291,6 @@ class BASE_EXPORT DeathData {
DurationInt duration() const { return duration_; } DurationInt duration() const { return duration_; }
DurationInt max() const { return max_; } DurationInt max() const { return max_; }
// Emits HTML formated description of members, assuming |count| instances
// when calculating averages.
void WriteHTML(int count, std::string* output) const;
// Agggegate data into our state. // Agggegate data into our state.
void AddData(const Data& other); void AddData(const Data& other);
void AddDuration(DurationInt duration); void AddDuration(DurationInt duration);
...@@ -486,7 +498,7 @@ class BASE_EXPORT ThreadData { ...@@ -486,7 +498,7 @@ class BASE_EXPORT ThreadData {
ThreadData* next() const { return next_; } ThreadData* next() const { return next_; }
// Using our lock, make a copy of the specified maps. These calls may arrive // Using our lock, make a copy of the specified maps. These calls may arrive
// from non-local threads, and are used to quickly scan data from all threads // from non-local threads, and are used to quickly scan data from all threads
// in order to build an HTML page for about:profiler. // in order to build JSON for about:profiler.
void SnapshotBirthMap(BirthMap *output) const; void SnapshotBirthMap(BirthMap *output) const;
void SnapshotDeathMap(DeathMap *output) const; void SnapshotDeathMap(DeathMap *output) const;
// -------- end of should be private methods. // -------- end of should be private methods.
...@@ -525,30 +537,8 @@ class BASE_EXPORT ThreadData { ...@@ -525,30 +537,8 @@ class BASE_EXPORT ThreadData {
// in production code. // in production code.
friend class TrackedObjectsTest; friend class TrackedObjectsTest;
// Implment a stack that avoids allocations during a push() by having enough
// space ahead of time.
class ThreadDataPool {
public:
ThreadDataPool();
~ThreadDataPool();
// Make sure the stack is large enough to support the indicated number of
// elements.
void reserve(size_t largest_worker_pool_number);
bool empty() const;
const ThreadData* top() const;
void push(const ThreadData* thread_data);
void pop();
private:
std::vector<const ThreadData*> stack_;
size_t empty_slot_;
DISALLOW_COPY_AND_ASSIGN(ThreadDataPool);
};
// Worker thread construction creates a name since there is none. // Worker thread construction creates a name since there is none.
explicit ThreadData(size_t thread_number); explicit ThreadData(int thread_number);
// Message loop based construction should provide a name. // Message loop based construction should provide a name.
explicit ThreadData(const std::string& suggested_name); explicit ThreadData(const std::string& suggested_name);
...@@ -577,7 +567,7 @@ class BASE_EXPORT ThreadData { ...@@ -577,7 +567,7 @@ class BASE_EXPORT ThreadData {
// This method should be called when a worker thread terminates, so that we // This method should be called when a worker thread terminates, so that we
// can save all the thread data into a cache of reusable ThreadData instances. // can save all the thread data into a cache of reusable ThreadData instances.
void OnThreadTerminationCleanup() const; void OnThreadTerminationCleanup();
// Cleans up data structures, and returns statics to near pristine (mostly // Cleans up data structures, and returns statics to near pristine (mostly
// uninitialized) state. If there is any chance that other threads are still // uninitialized) state. If there is any chance that other threads are still
...@@ -593,16 +583,17 @@ class BASE_EXPORT ThreadData { ...@@ -593,16 +583,17 @@ class BASE_EXPORT ThreadData {
// We use thread local store to identify which ThreadData to interact with. // We use thread local store to identify which ThreadData to interact with.
static base::ThreadLocalStorage::Slot tls_index_; static base::ThreadLocalStorage::Slot tls_index_;
// List of ThreadData instances for use with worker threads. When a worker
// thread is done (terminated), we push it onto this llist. When a new worker
// thread is created, we first try to re-use a ThreadData instance from the
// list, and if none are available, construct a new one.
// This is only accessed while list_lock_ is held.
static ThreadData* first_retired_worker_;
// Link to the most recently created instance (starts a null terminated list). // Link to the most recently created instance (starts a null terminated list).
// The list is traversed by about:profiler when it needs to snapshot data. // The list is traversed by about:profiler when it needs to snapshot data.
// This is only accessed while list_lock_ is held. // This is only accessed while list_lock_ is held.
static ThreadData* all_thread_data_list_head_; static ThreadData* all_thread_data_list_head_;
// Set of ThreadData instances for use with worker threads. When a worker
// thread is done (terminating), we push it into this pool. When a new worker
// thread is created, we first try to re-use a ThreadData instance from the
// pool, and if none are available, construct a new one.
// This is only accessed while list_lock_ is held.
static ThreadDataPool* unregistered_thread_data_pool_;
// The next available thread number. This should only be accessed when the // The next available thread number. This should only be accessed when the
// list_lock_ is held. // list_lock_ is held.
static int thread_number_counter_; static int thread_number_counter_;
...@@ -631,6 +622,11 @@ class BASE_EXPORT ThreadData { ...@@ -631,6 +622,11 @@ class BASE_EXPORT ThreadData {
// data). // data).
ThreadData* next_; ThreadData* next_;
// Pointer to another ThreadData instance for a Worker-Thread that has been
// retired (its thread was terminated). This value is non-NULL only for a
// retired ThreadData associated with a Worker-Thread.
ThreadData* next_retired_worker_;
// The name of the thread that is being recorded. If this thread has no // The name of the thread that is being recorded. If this thread has no
// message_loop, then this is a worker thread, with a sequence number postfix. // message_loop, then this is a worker thread, with a sequence number postfix.
std::string thread_name_; std::string thread_name_;
...@@ -639,7 +635,7 @@ class BASE_EXPORT ThreadData { ...@@ -639,7 +635,7 @@ class BASE_EXPORT ThreadData {
// stored in the unregistered_thread_data_pool_ when not in use. // stored in the unregistered_thread_data_pool_ when not in use.
// Value is zero when it is not a worker thread. Value is a positive integer // Value is zero when it is not a worker thread. Value is a positive integer
// corresponding to the created thread name if it is a worker thread. // corresponding to the created thread name if it is a worker thread.
size_t worker_thread_number_; int worker_thread_number_;
// A map used on each thread to keep track of Births on this thread. // A map used on each thread to keep track of Births on this thread.
// This map should only be accessed on the thread it was constructed on. // This map should only be accessed on the thread it was constructed on.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment