Commit 2c6f7843 authored by Francois Doray's avatar Francois Doray Committed by Commit Bot

[startup] Avoid large allocations when getting the number of hard faults.

On startup, ::NtQuerySystemInformation() is invoked to get the number
of hard faults and determine if the startup is "warm" or "cold". The
function can fail if the provided buffer is too small. In that case,
we resize the buffer to the size requested by the function and we
retry. In some cases, the size returned by the function is very large
(> 1 MB). It is undesirable to invoke a function to fill a large buffer
just to record startup histograms.

This CL does not get the hard fault count if it requires a buffer with
a size >= 512 KB. The failure to get the hard fault count is reported
in the "undetermined" bucket of the Startup.Temperature histogram.
Histograms suffixed with the startup temperature are obviously not
recorded.

Bug: 1085716
Change-Id: I0864a2af26e07cddcab4e094d344473ccbb5b9a5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2302829Reviewed-by: default avatarGabriel Charette <gab@chromium.org>
Commit-Queue: François Doray <fdoray@chromium.org>
Cr-Commit-Position: refs/heads/master@{#823977}
parent 3323bbd4
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "base/metrics/histogram.h" #include "base/metrics/histogram.h"
#include "base/metrics/histogram_functions.h" #include "base/metrics/histogram_functions.h"
#include "base/notreached.h" #include "base/notreached.h"
#include "base/optional.h"
#include "base/process/process.h" #include "base/process/process.h"
#include "base/strings/string_number_conversions.h" #include "base/strings/string_number_conversions.h"
#include "base/threading/platform_thread.h" #include "base/threading/platform_thread.h"
...@@ -67,11 +68,11 @@ enum StartupTemperature { ...@@ -67,11 +68,11 @@ enum StartupTemperature {
// The startup type couldn't quite be classified as warm or cold, but rather // The startup type couldn't quite be classified as warm or cold, but rather
// was somewhere in between. // was somewhere in between.
LUKEWARM_STARTUP_TEMPERATURE = 2, LUKEWARM_STARTUP_TEMPERATURE = 2,
// Startup temperature wasn't yet determined, or could not be determined.
UNDETERMINED_STARTUP_TEMPERATURE = 3,
// This must be after all meaningful values. All new values should be added // This must be after all meaningful values. All new values should be added
// above this one. // above this one.
STARTUP_TEMPERATURE_COUNT, STARTUP_TEMPERATURE_COUNT,
// Startup temperature wasn't yet determined.
UNDETERMINED_STARTUP_TEMPERATURE
}; };
StartupTemperature g_startup_temperature = UNDETERMINED_STARTUP_TEMPERATURE; StartupTemperature g_startup_temperature = UNDETERMINED_STARTUP_TEMPERATURE;
...@@ -126,17 +127,15 @@ struct SYSTEM_PROCESS_INFORMATION_EX { ...@@ -126,17 +127,15 @@ struct SYSTEM_PROCESS_INFORMATION_EX {
typedef NTSTATUS (WINAPI *NtQuerySystemInformationPtr)( typedef NTSTATUS (WINAPI *NtQuerySystemInformationPtr)(
SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG); SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG);
// Gets the hard fault count of the current process through |hard_fault_count|. // Returns the hard fault count of the current process, or nullopt if it can't
// Returns true on success. // be determined.
bool GetHardFaultCountForCurrentProcess(uint32_t* hard_fault_count) { base::Optional<uint32_t> GetHardFaultCountForCurrentProcess() {
DCHECK(hard_fault_count);
// Get the function pointer. // Get the function pointer.
static const NtQuerySystemInformationPtr query_sys_info = static const NtQuerySystemInformationPtr query_sys_info =
reinterpret_cast<NtQuerySystemInformationPtr>(::GetProcAddress( reinterpret_cast<NtQuerySystemInformationPtr>(::GetProcAddress(
GetModuleHandle(L"ntdll.dll"), "NtQuerySystemInformation")); GetModuleHandle(L"ntdll.dll"), "NtQuerySystemInformation"));
if (query_sys_info == nullptr) if (query_sys_info == nullptr)
return false; return base::nullopt;
// The output of this system call depends on the number of threads and // The output of this system call depends on the number of threads and
// processes on the entire system, and this can change between calls. Retry // processes on the entire system, and this can change between calls. Retry
...@@ -152,12 +151,19 @@ bool GetHardFaultCountForCurrentProcess(uint32_t* hard_fault_count) { ...@@ -152,12 +151,19 @@ bool GetHardFaultCountForCurrentProcess(uint32_t* hard_fault_count) {
static_cast<ULONG>(buffer.size()), &return_length); static_cast<ULONG>(buffer.size()), &return_length);
// Insufficient space in the buffer. // Insufficient space in the buffer.
if (return_length > buffer.size()) { if (return_length > buffer.size()) {
// Abort if a large size is required for the buffer. It is undesirable to
// fill a large buffer just to record histograms.
constexpr ULONG kMaxLength = 512 * 1024;
if (return_length >= kMaxLength)
return base::nullopt;
// Resize the buffer and retry.
buffer.resize(return_length); buffer.resize(return_length);
continue; continue;
} }
if (NT_SUCCESS(status) && return_length <= buffer.size()) if (NT_SUCCESS(status) && return_length <= buffer.size())
break; break;
return false; return base::nullopt;
} }
// Look for the struct housing information for the current process. // Look for the struct housing information for the current process.
...@@ -167,18 +173,16 @@ bool GetHardFaultCountForCurrentProcess(uint32_t* hard_fault_count) { ...@@ -167,18 +173,16 @@ bool GetHardFaultCountForCurrentProcess(uint32_t* hard_fault_count) {
DCHECK_LE(index + sizeof(SYSTEM_PROCESS_INFORMATION_EX), buffer.size()); DCHECK_LE(index + sizeof(SYSTEM_PROCESS_INFORMATION_EX), buffer.size());
SYSTEM_PROCESS_INFORMATION_EX* proc_info = SYSTEM_PROCESS_INFORMATION_EX* proc_info =
reinterpret_cast<SYSTEM_PROCESS_INFORMATION_EX*>(buffer.data() + index); reinterpret_cast<SYSTEM_PROCESS_INFORMATION_EX*>(buffer.data() + index);
if (base::win::HandleToUint32(proc_info->UniqueProcessId) == proc_id) { if (base::win::HandleToUint32(proc_info->UniqueProcessId) == proc_id)
*hard_fault_count = proc_info->HardFaultCount; return proc_info->HardFaultCount;
return true;
}
// The list ends when NextEntryOffset is zero. This also prevents busy // The list ends when NextEntryOffset is zero. This also prevents busy
// looping if the data is in fact invalid. // looping if the data is in fact invalid.
if (proc_info->NextEntryOffset <= 0) if (proc_info->NextEntryOffset <= 0)
return false; return base::nullopt;
index += proc_info->NextEntryOffset; index += proc_info->NextEntryOffset;
} }
return false; return base::nullopt;
} }
#endif // defined(OS_WIN) #endif // defined(OS_WIN)
...@@ -275,28 +279,31 @@ void UmaHistogramAndTraceWithTemperatureAndMaxPressure( ...@@ -275,28 +279,31 @@ void UmaHistogramAndTraceWithTemperatureAndMaxPressure(
// platforms. // platforms.
void RecordHardFaultHistogram() { void RecordHardFaultHistogram() {
#if defined(OS_WIN) #if defined(OS_WIN)
uint32_t hard_fault_count = 0;
// Don't record histograms if unable to get the hard fault count.
if (!GetHardFaultCountForCurrentProcess(&hard_fault_count))
return;
// Hard fault counts are expected to be in the thousands range,
// corresponding to faulting in ~10s of MBs of code ~10s of KBs at a time.
// (Observed to vary from 1000 to 10000 on various test machines and
// platforms.)
base::UmaHistogramCustomCounts(
"Startup.BrowserMessageLoopStartHardFaultCount", hard_fault_count, 1,
40000, 50);
// Determine the startup type based on the number of observed hard faults.
DCHECK_EQ(UNDETERMINED_STARTUP_TEMPERATURE, g_startup_temperature); DCHECK_EQ(UNDETERMINED_STARTUP_TEMPERATURE, g_startup_temperature);
if (hard_fault_count < kWarmStartHardFaultCountThreshold) {
g_startup_temperature = WARM_STARTUP_TEMPERATURE; const base::Optional<uint32_t> hard_fault_count =
} else if (hard_fault_count >= kColdStartHardFaultCountThreshold) { GetHardFaultCountForCurrentProcess();
g_startup_temperature = COLD_STARTUP_TEMPERATURE;
if (hard_fault_count.has_value()) {
// Hard fault counts are expected to be in the thousands range,
// corresponding to faulting in ~10s of MBs of code ~10s of KBs at a time.
// (Observed to vary from 1000 to 10000 on various test machines and
// platforms.)
base::UmaHistogramCustomCounts(
"Startup.BrowserMessageLoopStartHardFaultCount",
hard_fault_count.value(), 1, 40000, 50);
// Determine the startup type based on the number of observed hard faults.
if (hard_fault_count < kWarmStartHardFaultCountThreshold) {
g_startup_temperature = WARM_STARTUP_TEMPERATURE;
} else if (hard_fault_count >= kColdStartHardFaultCountThreshold) {
g_startup_temperature = COLD_STARTUP_TEMPERATURE;
} else {
g_startup_temperature = LUKEWARM_STARTUP_TEMPERATURE;
}
} else { } else {
g_startup_temperature = LUKEWARM_STARTUP_TEMPERATURE; // |g_startup_temperature| remains UNDETERMINED_STARTUP_TEMPERATURE if the
// number of hard faults could not be determined.
} }
// Record the startup 'temperature'. // Record the startup 'temperature'.
......
...@@ -70098,6 +70098,7 @@ https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_2.7.1.pdf ...@@ -70098,6 +70098,7 @@ https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_2.7.1.pdf
<int value="0" label="Cold startup (mostly hard faults)"/> <int value="0" label="Cold startup (mostly hard faults)"/>
<int value="1" label="Warm startup (nearly no hard faults)"/> <int value="1" label="Warm startup (nearly no hard faults)"/>
<int value="2" label="Lukewarm startup (in between cold and warm)"/> <int value="2" label="Lukewarm startup (in between cold and warm)"/>
<int value="3" label="Unable to determine the startup temperature"/>
</enum> </enum>
<enum name="StartupURLsMigration"> <enum name="StartupURLsMigration">
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment