Commit 10158843 authored by Kuo-Hsin Yang's avatar Kuo-Hsin Yang Committed by Commit Bot

Remove the thread in MemoryKillsMonitor

Implement the OOM Kills histogram by checking the oom_kill field in
/proc/vmstat instead of monitoring the kernel log.

Bug: 1124182
Change-Id: I8e992933e394104658ac01c4f123ece411f7bc26
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2402778Reviewed-by: default avatarCheng-Yu Lee <cylee@chromium.org>
Reviewed-by: default avatarAchuith Bhandarkar <achuith@chromium.org>
Commit-Queue: Kuo-Hsin Yang <vovoy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#806559}
parent aaa730d0
......@@ -536,7 +536,7 @@ void ChromeBrowserMainPartsChromeos::PostMainMessageLoopStart() {
dbus_services_.reset(new internal::DBusServices(parameters()));
// Need to be done after LoginState has been initialized in DBusServices().
memory_kills_monitor_ = memory::MemoryKillsMonitor::Initialize();
memory::MemoryKillsMonitor::Initialize();
ChromeBrowserMainPartsLinux::PostMainMessageLoopStart();
}
......
......@@ -155,8 +155,6 @@ class ChromeBrowserMainPartsChromeos : public ChromeBrowserMainPartsLinux {
std::unique_ptr<ArcKioskAppManager> arc_kiosk_app_manager_;
std::unique_ptr<WebKioskAppManager> web_kiosk_app_manager_;
std::unique_ptr<::memory::MemoryKillsMonitor::Handle> memory_kills_monitor_;
std::unique_ptr<ChromeKeyboardControllerClient>
chrome_keyboard_controller_client_;
......
......@@ -4,83 +4,36 @@
#include "chrome/browser/memory/memory_kills_monitor.h"
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
#include <fstream>
#include <ios>
#include <string>
#include <vector>
#include "base/bind.h"
#include "base/command_line.h"
#include "base/debug/leak_annotations.h"
#include "base/files/file_util.h"
#include "base/files/scoped_file.h"
#include "base/lazy_instance.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/posix/safe_strerror.h"
#include "base/sequenced_task_runner.h"
#include "base/process/process_metrics.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/synchronization/atomic_flag.h"
#include "base/threading/platform_thread.h"
#include "chrome/browser/memory/memory_kills_histogram.h"
#include "content/public/browser/browser_thread.h"
#include "third_party/re2/src/re2/re2.h"
namespace memory {
using base::TimeDelta;
namespace {
base::LazyInstance<MemoryKillsMonitor>::Leaky g_memory_kills_monitor_instance =
LAZY_INSTANCE_INITIALIZER;
int64_t GetTimestamp(const std::string& line) {
std::vector<std::string> fields = base::SplitString(
line, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
int64_t timestamp = -1;
// Timestamp is the third field in a line of /dev/kmsg.
if (fields.size() < 3 || !base::StringToInt64(fields[2], &timestamp))
return -1;
return timestamp;
}
void LogEvent(const base::Time& time_stamp, const std::string& event) {
VLOG(1) << time_stamp.ToJavaTime() << ", " << event;
}
} // namespace
MemoryKillsMonitor::Handle::Handle(MemoryKillsMonitor* outer) : outer_(outer) {
DCHECK(outer_);
}
MemoryKillsMonitor::Handle::~Handle() {
if (outer_) {
VLOG(2) << "Chrome is shutting down" << outer_;
outer_->is_shutting_down_.Set();
}
}
MemoryKillsMonitor::MemoryKillsMonitor() = default;
MemoryKillsMonitor::~MemoryKillsMonitor() {
// The instance has to be leaked on shutdown as it is referred to by a
// non-joinable thread but ~MemoryKillsMonitor() can't be explicitly deleted
// as it overrides ~SimpleThread(), it should nevertheless never be invoked.
NOTREACHED();
}
// static
std::unique_ptr<MemoryKillsMonitor::Handle> MemoryKillsMonitor::Initialize() {
void MemoryKillsMonitor::Initialize() {
VLOG(2) << "MemoryKillsMonitor::Initializing on "
<< base::PlatformThread::CurrentId();
......@@ -91,11 +44,6 @@ std::unique_ptr<MemoryKillsMonitor::Handle> MemoryKillsMonitor::Initialize() {
login_state->AddObserver(g_memory_kills_monitor_instance.Pointer());
else
LOG(ERROR) << "LoginState is not initialized";
// The MemoryKillsMonitor::Handle will notify the MemoryKillsMonitor
// when it is destroyed so that the underlying thread can at a minimum not
// do extra work during shutdown.
return std::make_unique<Handle>(g_memory_kills_monitor_instance.Pointer());
}
// static
......@@ -107,48 +55,6 @@ void MemoryKillsMonitor::LogLowMemoryKill(
type, estimated_freed_kb);
}
// static
void MemoryKillsMonitor::TryMatchOomKillLine(const std::string& line) {
// Sample OOM log line:
// 3,1362,97646497541,-;Out of memory: Kill process 29582 (android.vending)
// score 961 or sacrifice child.
// Precompile the regex object since the pattern is constant.
static const LazyRE2 kOomKillPattern = {
R"(Out of memory: Kill process .* score \d+)"};
if (RE2::PartialMatch(line, *kOomKillPattern)) {
g_memory_kills_monitor_instance.Get().LogOOMKill();
}
}
// TODO(cylee): Consider adding a unit test for this fuction.
void MemoryKillsMonitor::Run() {
VLOG(2) << "Started monitoring OOM kills on thread "
<< base::PlatformThread::CurrentId();
std::ifstream kmsg_stream("/dev/kmsg", std::ifstream::in);
if (kmsg_stream.fail()) {
LOG(WARNING) << "Open /dev/kmsg failed: " << base::safe_strerror(errno);
return;
}
// Skip kernel messages prior to the instantiation of this object to avoid
// double reporting.
// Note: there's a small gap between login the fseek here, and events in that
// period will not be recorded.
kmsg_stream.seekg(0, std::ios_base::end);
std::string line;
while (std::getline(kmsg_stream, line)) {
if (is_shutting_down_.IsSet()) {
// Not guaranteed to execute when the process is shutting down,
// because the thread might be blocked in fgets().
VLOG(1) << "Chrome is shutting down, MemoryKillsMonitor exits.";
break;
}
TryMatchOomKillLine(line);
}
}
void MemoryKillsMonitor::LoggedInStateChanged() {
VLOG(2) << "LoggedInStateChanged";
auto* login_state = chromeos::LoginState::Get();
......@@ -177,12 +83,46 @@ void MemoryKillsMonitor::StartMonitoring() {
UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Count", 0, 1, 1000, 1001);
UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.LowMemoryKiller.Count", 0, 1, 1000, 1001);
base::SimpleThread::Options non_joinable_options;
non_joinable_options.joinable = false;
non_joinable_worker_thread_ = std::make_unique<base::DelegateSimpleThread>(
this, "memory_kills_monitor", non_joinable_options);
non_joinable_worker_thread_->StartAsync();
monitoring_started_.Set();
base::VmStatInfo vmstat;
if (base::GetVmStatInfo(&vmstat)) {
last_oom_kills_count_ = vmstat.oom_kill;
} else {
last_oom_kills_count_ = 0;
}
checking_timer_.Start(FROM_HERE, base::TimeDelta::FromSeconds(1),
base::BindRepeating(&MemoryKillsMonitor::CheckOOMKill,
base::Unretained(this)));
}
void MemoryKillsMonitor::CheckOOMKill() {
base::VmStatInfo vmstat;
if (base::GetVmStatInfo(&vmstat)) {
CheckOOMKillImpl(vmstat.oom_kill);
}
}
void MemoryKillsMonitor::CheckOOMKillImpl(unsigned long current_oom_kills) {
DCHECK(monitoring_started_.IsSet());
unsigned long oom_kills_increased = current_oom_kills - last_oom_kills_count_;
if (oom_kills_increased == 0)
return;
VLOG(1) << "OOM_KILLS " << oom_kills_increased << " times";
for (int i = 0; i < oom_kills_increased; ++i) {
++oom_kills_count_;
// Report the cumulative count of killed process in one login session. For
// example if there are 3 processes killed, it would report 1 for the first
// kill, 2 for the second kill, then 3 for the final kill.
UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Count", oom_kills_count_, 1, 1000,
1001);
}
last_oom_kills_count_ = current_oom_kills;
}
void MemoryKillsMonitor::LogLowMemoryKillImpl(const std::string& type,
......@@ -195,12 +135,12 @@ void MemoryKillsMonitor::LogLowMemoryKillImpl(const std::string& type,
return;
}
base::Time now = base::Time::Now();
LogEvent(now, "LOW_MEMORY_KILL_" + type);
VLOG(1) << "LOW_MEMORY_KILL_" << type;
const TimeDelta time_delta = last_low_memory_kill_time_.is_null()
? kMaxMemoryKillTimeDelta
: (now - last_low_memory_kill_time_);
base::Time now = base::Time::Now();
const base::TimeDelta time_delta = last_low_memory_kill_time_.is_null()
? kMaxMemoryKillTimeDelta
: (now - last_low_memory_kill_time_);
UMA_HISTOGRAM_MEMORY_KILL_TIME_INTERVAL("Arc.LowMemoryKiller.TimeDelta",
time_delta);
last_low_memory_kill_time_ = now;
......@@ -212,33 +152,6 @@ void MemoryKillsMonitor::LogLowMemoryKillImpl(const std::string& type,
UMA_HISTOGRAM_MEMORY_KB("Arc.LowMemoryKiller.FreedSize", estimated_freed_kb);
}
void MemoryKillsMonitor::LogOOMKill() {
if (!monitoring_started_.IsSet()) {
LOG(WARNING) << "LogOOMKill before monitoring started, "
"skipped this log.";
return;
}
// Ideally the timestamp should be parsed from /dev/kmsg, but the timestamp
// there is the elapsed time since system boot. So the timestamp |now| used
// here is a bit delayed.
base::Time now = base::Time::Now();
LogEvent(now, "OOM_KILL");
++oom_kills_count_;
// Report the cumulative count of killed process in one login session.
// For example if there are 3 processes killed, it would report 1 for the
// first kill, 2 for the second kill, then 3 for the final kill.
// It doesn't report a final count at the end of a user session because
// the code runs in a dedicated thread and never ends until browser shutdown
// (or logout on Chrome OS). And on browser shutdown the thread may be
// terminated brutally so there's no chance to execute a "final" block.
// More specifically, code outside the main loop of MemoryKillsMonitor::Run()
// are not guaranteed to be executed.
UMA_HISTOGRAM_CUSTOM_COUNTS("Arc.OOMKills.Count", oom_kills_count_, 1, 1000,
1001);
}
MemoryKillsMonitor* MemoryKillsMonitor::GetForTesting() {
return g_memory_kills_monitor_instance.Pointer();
}
......
......@@ -5,17 +5,11 @@
#ifndef CHROME_BROWSER_MEMORY_MEMORY_KILLS_MONITOR_H_
#define CHROME_BROWSER_MEMORY_MEMORY_KILLS_MONITOR_H_
#include <memory>
#include <string>
#include "base/files/scoped_file.h"
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "base/synchronization/atomic_flag.h"
#include "base/synchronization/lock.h"
#include "base/threading/simple_thread.h"
#include "base/time/time.h"
#include "base/timer/timer.h"
#include "chromeos/login/login_state/login_state.h"
namespace memory {
......@@ -25,93 +19,64 @@ namespace memory {
// the chrome process has ended (usually because of a user log out). Thus it can
// be deemed as a per user session logger.
//
// For OOM kill events, it listens to kernel message (/dev/kmsg) in a blocking
// manner. It runs in a non-joinable thread in order to avoid blocking shutdown.
// There should be only one MemoryKillsMonitor instance globally at any given
// time, otherwise UMA would receive duplicate events.
// For OOM kill events, it checks the oom_kill field in /proc/vmstat
// periodically. There should be only one MemoryKillsMonitor instance globally
// at any given time, otherwise UMA would receive duplicate events.
//
// For Low memory kills events, chrome calls the single global instance of
// MemoryKillsMonitor synchronously. Note that it would be from a browser thread
// other than the listening thread.
class MemoryKillsMonitor : public base::DelegateSimpleThread::Delegate,
public chromeos::LoginState::Observer {
// MemoryKillsMonitor synchronously. Note that it must be called on the browser
// UI thread.
class MemoryKillsMonitor : public chromeos::LoginState::Observer {
public:
class Handle {
public:
// Constructs a handle that will flag |outer| as shutting down on
// destruction.
explicit Handle(MemoryKillsMonitor* outer);
~Handle();
private:
MemoryKillsMonitor* const outer_;
DISALLOW_COPY_AND_ASSIGN(Handle);
};
MemoryKillsMonitor();
~MemoryKillsMonitor() override;
// Initializes the global instance, but do not start monitoring until user
// log in. The caller is responsible for deleting the returned handle to
// indicate the end of monitoring.
static std::unique_ptr<Handle> Initialize();
// log in.
static void Initialize();
// A convenient function to log a low memory kill event. It only logs events
// after StartMonitoring() has been called.
static void LogLowMemoryKill(const std::string& type, int estimated_freed_kb);
// Gets the global instance for unit test.
static MemoryKillsMonitor* GetForTesting();
private:
FRIEND_TEST_ALL_PREFIXES(MemoryKillsMonitorTest, TestHistograms);
// Try to match a line in kernel message which reports OOM.
static void TryMatchOomKillLine(const std::string& line);
// Overridden from base::DelegateSimpleThread::Delegate:
void Run() override;
// Gets the global instance for unit test.
static MemoryKillsMonitor* GetForTesting();
// LoginState::Observer overrides.
void LoggedInStateChanged() override;
// Starts a non-joinable thread to monitor OOM kills. This must only
// be invoked once per process.
// Starts monitoring OOM kills.
void StartMonitoring();
// Logs low memory kill event.
void LogLowMemoryKillImpl(const std::string& type, int estimated_freed_kb);
// Logs OOM kill event.
void LogOOMKill();
// Checks system OOM count.
void CheckOOMKill();
// Split CheckOOMKill and CheckOOMKillImpl for testing.
void CheckOOMKillImpl(unsigned long current_oom_kills);
// A flag set when StartMonitoring() is called to indicate that monitoring has
// been started.
base::AtomicFlag monitoring_started_;
// A flag set when MemoryKillsMonitor is shutdown so that its thread can poll
// it and attempt to wind down from that point (to avoid unnecessary work, not
// because it blocks shutdown).
base::AtomicFlag is_shutting_down_;
// The underlying worker thread which is non-joinable to avoid blocking
// shutdown.
std::unique_ptr<base::DelegateSimpleThread> non_joinable_worker_thread_;
// The last time a low memory kill happens. Accessed from UI thread only.
base::Time last_low_memory_kill_time_;
// The number of low memory kills since monitoring is started. Accessed from
// UI thread only.
int low_memory_kills_count_ = 0;
// The last time an OOM kill happens. Accessed from
// |non_joinable_worker_thread_| only.
int64_t last_oom_kill_time_ = -1;
// The number of OOM kills since monitoring is started. Accessed from
// |non_joinable_worker_thread_| only.
int oom_kills_count_ = 0;
// The number of OOM kills since monitoring is started.
unsigned long oom_kills_count_ = 0;
// The last oom kills count from |GetCurrentOOMKills|.
unsigned long last_oom_kills_count_ = 0;
base::RepeatingTimer checking_timer_;
DISALLOW_COPY_AND_ASSIGN(MemoryKillsMonitor);
};
......
......@@ -108,18 +108,9 @@ TEST_F(MemoryKillsMonitorTest, TestHistograms) {
}
// OOM kills.
const char* sample_lines[] = {
"3,3429,812967386,-;Out of memory: Kill process 8291 (handle-watcher-) "
"score 674 or sacrifice child",
"3,3431,812981331,-;Out of memory: Kill process 8271 (.gms.persistent) "
"score 652 or sacrifice child",
"3,3433,812993014,-;Out of memory: Kill process 9210 (lowpool[11]) "
"score 653 or sacrifice child"
};
for (unsigned long i = 0; i < base::size(sample_lines); ++i) {
MemoryKillsMonitor::TryMatchOomKillLine(sample_lines[i]);
}
// Simulate getting 3 more oom kills.
g_memory_kills_monitor_unittest_instance->CheckOOMKillImpl(
g_memory_kills_monitor_unittest_instance->last_oom_kills_count_ + 3);
oom_count_histogram = GetOOMKillsCountHistogram();
ASSERT_TRUE(oom_count_histogram);
......@@ -133,15 +124,6 @@ TEST_F(MemoryKillsMonitorTest, TestHistograms) {
EXPECT_EQ(1, count_samples->GetCount(3));
}
// Call StartMonitoring multiple times.
base::DelegateSimpleThread* thread1 = g_memory_kills_monitor_unittest_instance
->non_joinable_worker_thread_.get();
EXPECT_NE(nullptr, thread1);
g_memory_kills_monitor_unittest_instance->StartMonitoring();
base::DelegateSimpleThread* thread2 = g_memory_kills_monitor_unittest_instance
->non_joinable_worker_thread_.get();
EXPECT_EQ(thread1, thread2);
lmk_count_histogram = GetLowMemoryKillsCountHistogram();
ASSERT_TRUE(lmk_count_histogram);
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment