Commit 47f939f7 authored by Cheng-Yu Lee's avatar Cheng-Yu Lee Committed by Commit Bot

Report zero memory kills to UMA at the begining of each user session for

easier comparison.

Before this change, MemoryKillsMonitor only reports kill events when it
happens. So it is hard to compare sessions with kill events and those
not.

Before this CL, the monitoring starts when session manager starts (e.g.,
when the login screen is present but not logged in). So simply report a
zero kill event at that time is not ideal because it would bloat the
actual number of user sessions.

In this CL, the monitoring starts when user has logged in. So it
reflects actual user sessions.

Bug=b:62499558
TEST=manual + cheets_LowMemoryKiller

Change-Id: I98cc14e4346e6883dacfe063366c78bc771750cc
Reviewed-on: https://chromium-review.googlesource.com/541200
Commit-Queue: Cheng-Yu Lee <cylee@chromium.org>
Reviewed-by: default avatarDan Erat <derat@chromium.org>
Reviewed-by: default avatarYusuke Sato (in China Mon-Thurs, may be offline) <yusukes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#487587}
parent 6c4cb06a
......@@ -553,10 +553,6 @@ void ChromeBrowserMainPartsChromeos::PreEarlyInitialization() {
chrome::SetChannel(channel);
#endif
// Start monitoring OOM kills.
memory_kills_monitor_ = base::MakeUnique<memory::MemoryKillsMonitor::Handle>(
memory::MemoryKillsMonitor::StartMonitoring());
ChromeBrowserMainPartsLinux::PreEarlyInitialization();
}
......@@ -584,6 +580,9 @@ void ChromeBrowserMainPartsChromeos::PostMainMessageLoopStart() {
dbus_services_.reset(new internal::DBusServices(parameters()));
// Need to be done after LoginState has been initialized in DBusServices().
memory_kills_monitor_ = memory::MemoryKillsMonitor::Initialize();
ChromeBrowserMainPartsLinux::PostMainMessageLoopStart();
}
......
......@@ -16,11 +16,14 @@
#include "base/synchronization/lock.h"
#include "base/threading/simple_thread.h"
#include "base/time/time.h"
#include "chromeos/login/login_state.h"
namespace memory {
// Traces kernel OOM kill events and Low memory kill events (by Chrome
// TabManager).
// TabManager). It starts logging when a user has logged in and stopped until
// the chrome process has ended (usually because of a user log out). Thus it can
// be deemed as a per user session logger.
//
// For OOM kill events, it listens to kernel message (/dev/kmsg) in a blocking
// manner. It runs in a non-joinable thread in order to avoid blocking shutdown.
......@@ -30,47 +33,63 @@ namespace memory {
// For Low memory kills events, chrome calls the single global instance of
// MemoryKillsMonitor synchronously. Note that it would be from a browser thread
// other than the listening thread.
//
// For every events, it reports to UMA and optionally a local file specified by
// --memory-kills-log. The log file is useful if we want to analyze low memory
// kills. If the flag is not given, it won't write to any file.
class MemoryKillsMonitor : public base::DelegateSimpleThread::Delegate {
class MemoryKillsMonitor : public base::DelegateSimpleThread::Delegate,
public chromeos::LoginState::Observer {
public:
// A handle representing the MemoryKillsMonitor's lifetime (the monitor itself
// can't be destroyed per being a non-joinable Thread).
class Handle {
public:
// Constructs a handle that will flag |outer| as shutting down on
// destruction.
explicit Handle(MemoryKillsMonitor* outer);
Handle(Handle&& handle);
~Handle();
private:
MemoryKillsMonitor* outer_;
MemoryKillsMonitor* const outer_;
DISALLOW_COPY_AND_ASSIGN(Handle);
};
// Instantiates the MemoryKillsMonitor instance and starts it. This must only
// be invoked once per process.
static Handle StartMonitoring();
MemoryKillsMonitor();
~MemoryKillsMonitor() override;
// Initializes the global instance, but do not start monitoring until user
// log in. The caller is responsible for deleting the returned handle to
// indicate the end of monitoring.
static std::unique_ptr<Handle> Initialize();
// Logs a low memory kill event.
// A convenient function to log a low memory kill event. It only logs events
// after StartMonitoring() has been called.
static void LogLowMemoryKill(const std::string& type, int estimated_freed_kb);
// Gets the global instance for unit test.
static MemoryKillsMonitor* GetForTesting();
private:
FRIEND_TEST_ALL_PREFIXES(MemoryKillsMonitorTest, TryMatchOomKillLine);
FRIEND_TEST_ALL_PREFIXES(MemoryKillsMonitorTest, TestHistograms);
MemoryKillsMonitor();
~MemoryKillsMonitor() override;
// Try to match a line in kernel message which reports OOM.
static void TryMatchOomKillLine(const std::string& line);
// Overridden from base::DelegateSimpleThread::Delegate:
void Run() override;
// Try to match a line in kernel message which reports OOM.
static void TryMatchOomKillLine(const std::string& line);
// LoginState::Observer overrides.
void LoggedInStateChanged() override;
// Starts a non-joinable thread to monitor OOM kills. This must only
// be invoked once per process.
void StartMonitoring();
// Logs low memory kill event.
void LogLowMemoryKillImpl(const std::string& type, int estimated_freed_kb);
// Logs OOM kill event.
void LogOOMKill(int64_t time_stamp, int oom_badness);
// A flag set when StartMonitoring() is called to indicate that monitoring has
// been started.
base::AtomicFlag monitoring_started_;
// A flag set when MemoryKillsMonitor is shutdown so that its thread can poll
// it and attempt to wind down from that point (to avoid unnecessary work, not
......@@ -81,6 +100,19 @@ class MemoryKillsMonitor : public base::DelegateSimpleThread::Delegate {
// shutdown.
std::unique_ptr<base::DelegateSimpleThread> non_joinable_worker_thread_;
// The last time a low memory kill happens. Accessed from UI thread only.
base::Time last_low_memory_kill_time_;
// The number of low memory kills since monitoring is started. Accessed from
// UI thread only.
int low_memory_kills_count_;
// The last time an OOM kill happens. Accessed from
// |non_joinable_worker_thread_| only.
int64_t last_oom_kill_time_;
// The number of OOM kills since monitoring is started. Accessed from
// |non_joinable_worker_thread_| only.
int oom_kills_count_;
DISALLOW_COPY_AND_ASSIGN(MemoryKillsMonitor);
};
......
......@@ -10,49 +10,103 @@
#include "base/metrics/statistics_recorder.h"
#include "base/time/time.h"
#include "chrome/browser/memory/memory_kills_histogram.h"
#include "content/public/test/test_browser_thread_bundle.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace memory {
using MemoryKillsMonitorTest = testing::Test;
namespace {
base::HistogramBase* GetLowMemoryKillsCountHistogram() {
return base::StatisticsRecorder::FindHistogram("Arc.LowMemoryKiller.Count");
}
base::HistogramBase* GetOOMKillsCountHistogram() {
return base::StatisticsRecorder::FindHistogram("Arc.OOMKills.Count");
}
} // namespace.
class MemoryKillsMonitorTest : public testing::Test {
private:
content::TestBrowserThreadBundle test_browser_thread_bundle_;
};
TEST_F(MemoryKillsMonitorTest, TestHistograms) {
std::unique_ptr<base::StatisticsRecorder> statistic_recorder(
base::StatisticsRecorder::CreateTemporaryForTesting());
MemoryKillsMonitor* g_instance = MemoryKillsMonitor::GetForTesting();
TEST_F(MemoryKillsMonitorTest, LogLowMemoryKill) {
MemoryKillsMonitor::LogLowMemoryKill("APP", 123);
MemoryKillsMonitor::LogLowMemoryKill("APP", 100);
MemoryKillsMonitor::LogLowMemoryKill("TAB", 10000);
auto* histogram_count =
base::StatisticsRecorder::FindHistogram("Arc.LowMemoryKiller.Count");
ASSERT_TRUE(histogram_count);
auto count_samples = histogram_count->SnapshotSamples();
EXPECT_EQ(3, count_samples->TotalCount());
EXPECT_EQ(1, count_samples->GetCount(1));
EXPECT_EQ(1, count_samples->GetCount(2));
EXPECT_EQ(1, count_samples->GetCount(3));
auto* histogram_freed_size =
base::StatisticsRecorder::FindHistogram("Arc.LowMemoryKiller.FreedSize");
ASSERT_TRUE(histogram_freed_size);
auto freed_size_samples = histogram_freed_size->SnapshotSamples();
EXPECT_EQ(3, freed_size_samples->TotalCount());
// 123 and 100 are in the same bucket.
EXPECT_EQ(2, freed_size_samples->GetCount(123));
EXPECT_EQ(2, freed_size_samples->GetCount(100));
EXPECT_EQ(1, freed_size_samples->GetCount(10000));
auto* histogram_time_delta =
base::StatisticsRecorder::FindHistogram("Arc.LowMemoryKiller.TimeDelta");
ASSERT_TRUE(histogram_time_delta);
auto time_delta_samples = histogram_time_delta->SnapshotSamples();
EXPECT_EQ(3, time_delta_samples->TotalCount());
// First time delta is set to kMaxMemoryKillTimeDelta.
EXPECT_EQ(1, time_delta_samples->GetCount(
kMaxMemoryKillTimeDelta.InMilliseconds()));
// Time delta for the other 2 events depends on Now() so we skip testing it
// here.
}
auto* lmk_count_histogram = GetLowMemoryKillsCountHistogram();
auto* oom_count_histogram = GetOOMKillsCountHistogram();
// Before StartMonitoring() is called, nothing is recorded.
ASSERT_FALSE(lmk_count_histogram);
ASSERT_FALSE(oom_count_histogram);
// Start monitoring.
g_instance->StartMonitoring();
lmk_count_histogram = GetLowMemoryKillsCountHistogram();
oom_count_histogram = GetOOMKillsCountHistogram();
ASSERT_TRUE(lmk_count_histogram);
ASSERT_TRUE(oom_count_histogram);
{
auto count_samples = lmk_count_histogram->SnapshotSamples();
EXPECT_EQ(1, count_samples->TotalCount());
EXPECT_EQ(1, count_samples->GetCount(0));
}
{
auto count_samples = oom_count_histogram->SnapshotSamples();
EXPECT_EQ(1, count_samples->TotalCount());
EXPECT_EQ(1, count_samples->GetCount(0));
}
// Low memory kills.
MemoryKillsMonitor::LogLowMemoryKill("APP", 123);
MemoryKillsMonitor::LogLowMemoryKill("APP", 100);
MemoryKillsMonitor::LogLowMemoryKill("TAB", 10000);
lmk_count_histogram = GetLowMemoryKillsCountHistogram();
ASSERT_TRUE(lmk_count_histogram);
{
auto count_samples = lmk_count_histogram->SnapshotSamples();
EXPECT_EQ(4, count_samples->TotalCount());
// The zero count is implicitly added when StartMonitoring() is called.
EXPECT_EQ(1, count_samples->GetCount(0));
EXPECT_EQ(1, count_samples->GetCount(1));
EXPECT_EQ(1, count_samples->GetCount(2));
EXPECT_EQ(1, count_samples->GetCount(3));
}
{
auto* histogram_freed_size = base::StatisticsRecorder::FindHistogram(
"Arc.LowMemoryKiller.FreedSize");
ASSERT_TRUE(histogram_freed_size);
auto freed_size_samples = histogram_freed_size->SnapshotSamples();
EXPECT_EQ(3, freed_size_samples->TotalCount());
// 123 and 100 are in the same bucket.
EXPECT_EQ(2, freed_size_samples->GetCount(123));
EXPECT_EQ(2, freed_size_samples->GetCount(100));
EXPECT_EQ(1, freed_size_samples->GetCount(10000));
}
TEST_F(MemoryKillsMonitorTest, TryMatchOomKillLine) {
{
auto* histogram_time_delta = base::StatisticsRecorder::FindHistogram(
"Arc.LowMemoryKiller.TimeDelta");
ASSERT_TRUE(histogram_time_delta);
auto time_delta_samples = histogram_time_delta->SnapshotSamples();
EXPECT_EQ(3, time_delta_samples->TotalCount());
// First time delta is set to kMaxMemoryKillTimeDelta.
EXPECT_EQ(1, time_delta_samples->GetCount(
kMaxMemoryKillTimeDelta.InMilliseconds()));
// Time delta for the other 2 events depends on Now() so we skip testing it
// here.
}
// OOM kills.
const char* sample_lines[] = {
"3,3429,812967386,-;Out of memory: Kill process 8291 (handle-watcher-) "
"score 674 or sacrifice child",
......@@ -66,34 +120,62 @@ TEST_F(MemoryKillsMonitorTest, TryMatchOomKillLine) {
MemoryKillsMonitor::TryMatchOomKillLine(sample_lines[i]);
}
auto* histogram_count =
base::StatisticsRecorder::FindHistogram("Arc.OOMKills.Count");
ASSERT_TRUE(histogram_count);
auto count_samples = histogram_count->SnapshotSamples();
EXPECT_EQ(3, count_samples->TotalCount());
EXPECT_EQ(1, count_samples->GetCount(1));
EXPECT_EQ(1, count_samples->GetCount(2));
EXPECT_EQ(1, count_samples->GetCount(3));
auto* histogram_score =
base::StatisticsRecorder::FindHistogram("Arc.OOMKills.Score");
ASSERT_TRUE(histogram_score);
auto score_samples = histogram_score->SnapshotSamples();
EXPECT_EQ(3, score_samples->TotalCount());
EXPECT_EQ(1, score_samples->GetCount(674));
EXPECT_EQ(1, score_samples->GetCount(652));
EXPECT_EQ(1, score_samples->GetCount(653));
auto* histogram_time_delta =
base::StatisticsRecorder::FindHistogram("Arc.OOMKills.TimeDelta");
ASSERT_TRUE(histogram_time_delta);
auto time_delta_samples = histogram_time_delta->SnapshotSamples();
EXPECT_EQ(3, time_delta_samples->TotalCount());
// First time delta is set to kMaxMemoryKillTimeDelta.
EXPECT_EQ(1, time_delta_samples->GetCount(
kMaxMemoryKillTimeDelta.InMilliseconds()));
EXPECT_EQ(1, time_delta_samples->GetCount(11));
EXPECT_EQ(1, time_delta_samples->GetCount(13));
oom_count_histogram = GetOOMKillsCountHistogram();
ASSERT_TRUE(oom_count_histogram);
{
auto count_samples = oom_count_histogram->SnapshotSamples();
EXPECT_EQ(4, count_samples->TotalCount());
// The zero count is implicitly added when StartMonitoring() is called.
EXPECT_EQ(1, count_samples->GetCount(0));
EXPECT_EQ(1, count_samples->GetCount(1));
EXPECT_EQ(1, count_samples->GetCount(2));
EXPECT_EQ(1, count_samples->GetCount(3));
}
{
auto* histogram_score =
base::StatisticsRecorder::FindHistogram("Arc.OOMKills.Score");
ASSERT_TRUE(histogram_score);
auto score_samples = histogram_score->SnapshotSamples();
EXPECT_EQ(3, score_samples->TotalCount());
EXPECT_EQ(1, score_samples->GetCount(674));
EXPECT_EQ(1, score_samples->GetCount(652));
EXPECT_EQ(1, score_samples->GetCount(653));
}
{
auto* histogram_time_delta =
base::StatisticsRecorder::FindHistogram("Arc.OOMKills.TimeDelta");
ASSERT_TRUE(histogram_time_delta);
auto time_delta_samples = histogram_time_delta->SnapshotSamples();
EXPECT_EQ(3, time_delta_samples->TotalCount());
// First time delta is set to kMaxMemoryKillTimeDelta.
EXPECT_EQ(1, time_delta_samples->GetCount(
kMaxMemoryKillTimeDelta.InMilliseconds()));
EXPECT_EQ(1, time_delta_samples->GetCount(11));
EXPECT_EQ(1, time_delta_samples->GetCount(13));
}
// Call StartMonitoring multiple times.
base::PlatformThreadId tid1 = g_instance->non_joinable_worker_thread_->tid();
g_instance->StartMonitoring();
base::PlatformThreadId tid2 = g_instance->non_joinable_worker_thread_->tid();
EXPECT_EQ(tid1, tid2);
lmk_count_histogram = GetLowMemoryKillsCountHistogram();
ASSERT_TRUE(lmk_count_histogram);
{
auto count_samples = lmk_count_histogram->SnapshotSamples();
// Ensure zero count is not increased.
EXPECT_EQ(1, count_samples->GetCount(0));
}
oom_count_histogram = GetOOMKillsCountHistogram();
ASSERT_TRUE(oom_count_histogram);
{
auto count_samples = oom_count_histogram->SnapshotSamples();
// Ensure zero count is not increased.
EXPECT_EQ(1, count_samples->GetCount(0));
}
}
} // namespace memory
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment