Commit e8542484 authored by Eric Seckler's avatar Eric Seckler Committed by Commit Bot

CpuTimeMetrics: Correct for idle times in approx time_in_state metric

As it turns out, the per-core time_in_state data from
/sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state is wall time
rather than CPU time and includes time the cores spend in idle states.

Consequently, the data we currently report into Power.Approx* metrics
isn't correct. We have to correct for the idle time when computing each
core type + frequency state's proportional part of the execution.

This patch adds a way to do so by polling per-core idle times from
/sys/devices/system/cpu/cpuX/cpuidle/stateY/time (via a helper in
base::CPU) and updates the CpuTimeMetrics code to account for idle time.

Bug: 1081760
Change-Id: I7b8168b47b1e4d118bc7569aebb07952fa78693e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2462088
Commit-Queue: Eric Seckler <eseckler@chromium.org>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBo <boliu@chromium.org>
Reviewed-by: default avatarSami Kyöstilä <skyostil@chromium.org>
Cr-Commit-Position: refs/heads/master@{#816509}
parent f86efafa
......@@ -335,6 +335,8 @@ constexpr char kTimeInStatePath[] =
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state";
constexpr char kPhysicalPackageIdPath[] =
"/sys/devices/system/cpu/cpu%d/topology/physical_package_id";
constexpr char kCoreIdleStateTimePath[] =
"/sys/devices/system/cpu/cpu%d/cpuidle/state%d/time";
bool SupportsTimeInState() {
// Reading from time_in_state doesn't block (it amounts to reading a struct
......@@ -388,6 +390,16 @@ bool ParseTimeInState(const std::string& content,
return true;
}
bool SupportsCoreIdleTimes() {
// Reading from the cpuidle driver doesn't block.
ThreadRestrictions::ScopedAllowIO allow_io;
// Check if the path for the idle time in state 0 for core 0 is readable.
FilePath idle_state0_path(
StringPrintf(kCoreIdleStateTimePath, /*core_index=*/0, /*idle_state=*/0));
ScopedFILE file_stream(OpenFile(idle_state0_path, "rb"));
return static_cast<bool>(file_stream);
}
std::vector<CPU::CoreType> GuessCoreTypes() {
// Try to guess the CPU architecture and cores of each cluster by comparing
// the maximum frequencies of the available (online and offline) cores.
......@@ -531,6 +543,46 @@ bool CPU::GetTimeInState(TimeInState& time_in_state) {
return true;
}
// static
bool CPU::GetCumulativeCoreIdleTimes(CoreIdleTimes& idle_times) {
idle_times.clear();
// The kernel may not support the cpufreq-stats driver.
static const bool kSupportsIdleTimes = SupportsCoreIdleTimes();
if (!kSupportsIdleTimes)
return false;
// Reading from the cpuidle driver doesn't block.
ThreadRestrictions::ScopedAllowIO allow_io;
int num_cpus = SysInfo::NumberOfProcessors();
bool success = false;
for (int core_index = 0; core_index < num_cpus; ++core_index) {
std::string content;
TimeDelta idle_time;
// The number of idle states is system/CPU dependent, so we increment and
// try to read each state until we fail.
for (int state_index = 0;; ++state_index) {
auto path = StringPrintf(kCoreIdleStateTimePath, core_index, state_index);
uint64_t idle_state_time = 0;
if (!ReadFileToString(FilePath(path), &content))
break;
StringToUint64(content, &idle_state_time);
idle_time += TimeDelta::FromMicroseconds(idle_state_time);
}
idle_times.push_back(idle_time);
// At least one of the cores should have some idle time, otherwise we report
// a failure.
success |= idle_time > base::TimeDelta();
}
return success;
}
#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID) ||
// defined(OS_AIX)
......
......@@ -98,11 +98,11 @@ class BASE_EXPORT CPU final {
CPU::CoreType core_type; // type of the cores in this cluster.
uint32_t cluster_core_index; // index of the first core in the cluster.
uint64_t core_frequency_khz;
TimeDelta cumulative_cpu_time;
TimeDelta cumulative_time;
};
using TimeInState = std::vector<TimeInStateEntry>;
// Emits the device's cumulative CPU usage split by CPU cluster frequency
// For each CPU core, emits the cumulative time spent in different frequency
// states into the output parameter (replacing its current contents). One
// entry in the output parameter is added for each cluster core index
// + frequency state combination with a non-zero CPU time value. Returns false
......@@ -113,6 +113,17 @@ class BASE_EXPORT CPU final {
// NOTE: Currently only supported on Linux/Android, and only on kernels with
// cpufreq-stats driver.
static bool GetTimeInState(TimeInState&);
// For each CPU core, emits the total cumulative wall time spent in any idle
// state into the output parameter (replacing its current contents). Returns
// false on failure. We return the usage via an output parameter to allow
// reuse of TimeInState's std::vector by the caller, e.g. to avoid allocations
// between repeated calls to this method.
//
// NOTE: Currently only supported on Linux/Android, and only on kernels with
// cpuidle driver.
using CoreIdleTimes = std::vector<TimeDelta>;
static bool GetCumulativeCoreIdleTimes(CoreIdleTimes&);
#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID) ||
// defined(OS_AIX)
......
......@@ -19,6 +19,7 @@
#include "base/metrics/histogram_macros.h"
#include "base/no_destructor.h"
#include "base/process/process_metrics.h"
#include "base/run_loop.h"
#include "base/sequence_checker.h"
#include "base/strings/pattern.h"
#include "base/strings/string_util.h"
......@@ -310,6 +311,9 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
reporting_interval_ = kReportAfterEveryNTasksOtherProcess;
}
DETACH_FROM_SEQUENCE(thread_pool_);
// Post a first collection to capture initial values for calculation of
// delta values in subsequent passes.
PostCollectionTask();
}
// base::TaskObserver implementation:
......@@ -326,6 +330,12 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
return;
task_counter_++;
if (task_counter_ == reporting_interval_) {
PostCollectionTask();
task_counter_ = 0;
}
}
void PostCollectionTask() {
// PostTask() applies a barrier, so this will be applied before the thread
// pool task executes and sets |collection_in_progress_| back to false.
collection_in_progress_.store(true, std::memory_order_relaxed);
......@@ -334,8 +344,6 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
base::BindOnce(
&ProcessCpuTimeTaskObserver::CollectAndReportCpuTimeOnThreadPool,
base::Unretained(this)));
task_counter_ = 0;
}
}
void CollectAndReportCpuTimeOnThreadPool() {
......@@ -362,53 +370,7 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
// kernels. This breakdown approximates Chrome's total per
// core-type/frequency usage by splitting the process's CPU time across
// cores/frequencies according to global per-core time_in_state values.
if (base::CPU::GetTimeInState(time_in_state_)) {
// Compute the total CPU time delta since the last cycle across all
// clusters and frequencies, so that we can compute proportional deltas in
// the second loop below.
base::TimeDelta total_cumulative;
for (const base::CPU::TimeInStateEntry& entry : time_in_state_) {
total_cumulative += entry.cumulative_cpu_time;
}
base::TimeDelta total_delta =
total_cumulative - total_reported_time_in_state_;
total_reported_time_in_state_ = total_cumulative;
if (process_cpu_time_delta > base::TimeDelta() &&
total_delta > base::TimeDelta()) {
for (const base::CPU::TimeInStateEntry& entry : time_in_state_) {
DCHECK_GT(approximate_time_in_state_reporters_.size(),
static_cast<size_t>(entry.core_type));
std::unique_ptr<TimeInStateReporter>& reporter =
approximate_time_in_state_reporters_[static_cast<size_t>(
entry.core_type)];
if (!reporter) {
reporter = std::make_unique<TimeInStateReporter>(
process_type_, entry.core_type, /*is_approximate=*/true);
}
// Compute delta since last cycle per entry.
uint32_t frequency_mhz = entry.core_frequency_khz / 1000;
base::TimeDelta& reported_time =
reported_time_in_state_[std::make_tuple(
entry.core_type, entry.cluster_core_index, frequency_mhz)];
base::TimeDelta time_delta =
entry.cumulative_cpu_time - reported_time;
reported_time = entry.cumulative_cpu_time;
if (time_delta > base::TimeDelta()) {
// Scale the process's cpu time by each cluster/frequency pair's
// relative proportion of execution time. We scale by a double value
// to avoid integer overflow in the presence of large time_delta values.
uint64_t delta_us = process_cpu_time_delta.InMicroseconds() *
(time_delta.InMicroseconds() /
static_cast<double>(total_delta.InMicroseconds()));
reporter->AddMicroseconds(frequency_mhz, delta_us);
}
}
}
}
CollectAndReportApproxTimeInState(process_cpu_time_delta);
// Also report a breakdown by thread type.
base::TimeDelta unattributed_delta = process_cpu_time_delta;
......@@ -506,6 +468,13 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
collection_in_progress_.store(false, std::memory_order_relaxed);
}
void WaitForCollectionForTesting() const {
base::RunLoop run_loop;
// Post the QuitClosure to execute after any pending collection.
task_runner_->PostTask(FROM_HERE, run_loop.QuitClosure());
run_loop.Run();
}
private:
using ClusterFrequency = std::tuple<base::CPU::CoreType,
uint32_t /*cluster_core_index*/,
......@@ -539,6 +508,170 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
return GetThreadTypeFromName(name);
}
void CollectAndReportApproxTimeInState(
base::TimeDelta process_cpu_time_delta) {
if (!base::CPU::GetTimeInState(time_in_state_) || time_in_state_.empty() ||
!base::CPU::GetCumulativeCoreIdleTimes(core_idle_times_)) {
return;
}
if (core_idle_times_.size() > reported_core_idle_times_.size())
reported_core_idle_times_.resize(core_idle_times_.size());
// Compute the wall time delta since the last cycle, so that we can
// compute active times per core type below. cpuidle and time_in_state
// information tick with CLOCK_MONOTONIC, so we use base::TimeTicks (also
// CLOCK_MONOTONIC) as a reference here.
base::TimeTicks now = base::TimeTicks::Now();
base::TimeDelta wall_time_delta = now - last_time_in_state_walltime_;
last_time_in_state_walltime_ = now;
// Convert core_idle_times_ to delta values.
for (uint32_t core_index = 0; core_index < core_idle_times_.size();
++core_index) {
base::TimeDelta absolute_idle_time = core_idle_times_[core_index];
core_idle_times_[core_index] -= reported_core_idle_times_[core_index];
reported_core_idle_times_[core_index] = absolute_idle_time;
}
// Compute total active time during this cycle.
base::TimeDelta total_active_time;
for (base::TimeDelta core_idle_time : core_idle_times_) {
base::TimeDelta active_time = wall_time_delta - core_idle_time;
if (active_time > base::TimeDelta())
total_active_time += active_time;
}
// Because time_in_state_ includes idle time and reports values for a
// single core of each cluster, we work out how much CPU time to attribute
// to each cluster and frequency through the following approximation:
// (1) For each cluster, we compute how much of the execution happened on
// cores of the cluster vs. cores on other clusters
// (current_cluster_proportion).
// (2) We assume that the time spent in idle will be mostly in the lower
// frequency band of the cluster. For that reason, we subtract the
// average idle time of a cluster's core from the cluster's first
// entries (lowest frequencies) in time_in_state.
// (3) For the remaining frequencies, we calculate the proportion of
// active time the cluster spent in each frequency
// (frequency_proportion).
// (4) Finally, we split the process's CPU time across clusters and
// frequencies based on current_cluster_proportion and
// frequency_proportion.
uint32_t last_core_index = -1;
uint32_t next_core_index = -1;
// Idle time of the current cluster that hasn't been attributed to a
// specific frequency state yet, for (2).
base::TimeDelta current_cluster_unattributed_idle_wall_time;
// Average wall time the current cluster's cores were active for, for (3).
base::TimeDelta current_cluster_active_wall_time;
// Proportion of the current cluster's core's cumulative active time of
// the total active time across all cores, for (1).
double current_cluster_proportion = 0;
for (size_t state_index = 0; state_index < time_in_state_.size();
++state_index) {
const base::CPU::TimeInStateEntry& entry = time_in_state_[state_index];
DCHECK_GT(approximate_time_in_state_reporters_.size(),
static_cast<size_t>(entry.core_type));
std::unique_ptr<TimeInStateReporter>& reporter =
approximate_time_in_state_reporters_[static_cast<size_t>(
entry.core_type)];
if (!reporter) {
reporter = std::make_unique<TimeInStateReporter>(
process_type_, entry.core_type, /*is_approximate=*/true);
}
// Compute delta since last cycle per entry.
uint32_t frequency_mhz = entry.core_frequency_khz / 1000;
base::TimeDelta& reported_time = reported_time_in_state_[std::make_tuple(
entry.core_type, entry.cluster_core_index, frequency_mhz)];
base::TimeDelta time_delta = entry.cumulative_time - reported_time;
reported_time = entry.cumulative_time;
// In the first cycle (wall_time_delta == now), we can't really trust
// active_time_per_core (because we don't know the absolute time
// domain of cpuidle values), so skip reporting.
bool first_cycle = wall_time_delta == (now - base::TimeTicks());
if (first_cycle || time_delta <= base::TimeDelta() ||
process_cpu_time_delta <= base::TimeDelta()) {
continue;
}
if (last_core_index != entry.cluster_core_index) {
// This is the first entry for a new cluster. Find the next
// cluster's first core and compute the cluster's active/idle wall
// time (3) and proportion of total execution time (1).
next_core_index = FindNextClusterCoreIndex(state_index);
base::TimeDelta cluster_active_time;
base::TimeDelta cluster_idle_time;
for (size_t core_index = entry.cluster_core_index;
core_index < next_core_index; ++core_index) {
cluster_idle_time += core_idle_times_[core_index];
cluster_active_time += wall_time_delta - core_idle_times_[core_index];
}
size_t num_cores = next_core_index - entry.cluster_core_index;
current_cluster_active_wall_time = cluster_active_time / num_cores;
current_cluster_unattributed_idle_wall_time =
cluster_idle_time / num_cores;
// (1) Proportion of execution on this cluster's cores vs others.
current_cluster_proportion = 0;
if (total_active_time > base::TimeDelta())
current_cluster_proportion = cluster_active_time / total_active_time;
last_core_index = entry.cluster_core_index;
}
// (2) Assign the cluster's idle wall time to the first entries, i.e.
// lowest frequencies.
if (time_delta < current_cluster_unattributed_idle_wall_time) {
// Attribute this frequency state entirely to idle time, skip it.
current_cluster_unattributed_idle_wall_time -= time_delta;
continue;
} else if (current_cluster_unattributed_idle_wall_time >
base::TimeDelta()) {
time_delta -= current_cluster_unattributed_idle_wall_time;
current_cluster_unattributed_idle_wall_time = base::TimeDelta();
}
// (3) Proportion of active wall time that this cluster spent in the
// frequency state.
double frequency_proportion = 0;
if (current_cluster_active_wall_time > base::TimeDelta())
frequency_proportion = time_delta / current_cluster_active_wall_time;
// (4) Scale the process's cpu time by the cluster/frequency pair's
// relative proportion of execution time. Note that we calculate
// double values for the proportions above first to avoid integer
// overflow in the presence of large time_delta values.
uint64_t delta_us = process_cpu_time_delta.InMicroseconds() *
frequency_proportion * current_cluster_proportion;
reporter->AddMicroseconds(frequency_mhz, delta_us);
}
}
// Returns the core index of the first core of the next cluster after the
// cluster of the given entry in |time_in_state_|. Returns max core_index + 1
// if no further clusters exist.
size_t FindNextClusterCoreIndex(size_t state_index) {
for (size_t next_state_index = state_index;
next_state_index < time_in_state_.size(); ++next_state_index) {
const auto& next_entry = time_in_state_[next_state_index];
if (next_entry.cluster_core_index !=
time_in_state_[state_index].cluster_core_index) {
return next_entry.cluster_core_index;
}
}
// No further clusters, return max core index + 1.
return core_idle_times_.size();
}
// Sample CPU time after a certain number of main-thread task to balance
// overhead of sampling and loss at process termination.
static constexpr int kReportAfterEveryNTasksPersistentProcess = 500;
......@@ -566,11 +699,13 @@ class ProcessCpuTimeTaskObserver : public base::TaskObserver {
approximate_time_in_state_reporters_ = {};
base::flat_map<ClusterFrequency, base::TimeDelta /*time_in_state*/>
reported_time_in_state_;
base::TimeDelta total_reported_time_in_state_;
base::CPU::CoreIdleTimes reported_core_idle_times_;
base::TimeTicks last_time_in_state_walltime_;
// Stored as instance variables to avoid allocation churn.
base::ProcessMetrics::CPUUsagePerThread cumulative_thread_times_;
base::ProcessMetrics::TimeInStatePerThread time_in_state_per_thread_;
base::CPU::TimeInState time_in_state_;
base::CPU::CoreIdleTimes core_idle_times_;
// Accessed on both sequences.
std::atomic<bool> collection_in_progress_;
......@@ -589,8 +724,12 @@ void SetupCpuTimeMetrics() {
}
void SampleCpuTimeMetricsForTesting() {
ProcessCpuTimeTaskObserver::GetInstance()
->CollectAndReportCpuTimeOnThreadPool();
auto* instance = ProcessCpuTimeTaskObserver::GetInstance();
// Make sure no collection is currently in progress (this may happen if
// GetInstance() above initializes the task observer).
instance->WaitForCollectionForTesting(); // IN-TEST
instance->PostCollectionTask();
instance->WaitForCollectionForTesting(); // IN-TEST
}
} // namespace content
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment