Commit c1834a9a authored by petkov@chromium.org's avatar petkov@chromium.org

Add support for collecting non-Chrome crash stats in Chrome OS.

Extends the external_metrics interface to recognize a new metric kind (crash) which gives a string to describe which kind of crash.  We handle three currently non-Chrome user space crashes, kernel crashes, and unclean system shutdowns.  These are accumulated, but not yet put into xml uploads until the server side is ready.

BUG=chromium-os:9352
TEST=unit_tests and tested in chromeos that log messages were generated corresponding with crashes.

Review URL: http://codereview.chromium.org/6077013
Patch from Ken Mixter <kmixter@chromium.org>.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@72154 0039d316-1c4b-4281-b951-d872f2087c98
parent 0ffba0eb
......@@ -18,7 +18,9 @@
#include "base/metrics/histogram.h"
#include "base/perftimer.h"
#include "base/time.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/browser_thread.h"
#include "chrome/browser/metrics/metrics_service.h"
#include "chrome/browser/metrics/user_metrics.h"
// Steps to add an action.
......@@ -80,6 +82,18 @@ void ExternalMetrics::RecordAction(const char* action) {
NewRunnableMethod(this, &ExternalMetrics::RecordActionUI, action));
}
void ExternalMetrics::RecordCrashUI(const std::string& crash_kind) {
if (g_browser_process && g_browser_process->metrics_service()) {
g_browser_process->metrics_service()->LogChromeOSCrash(crash_kind);
}
}
void ExternalMetrics::RecordCrash(const std::string& crash_kind) {
BrowserThread::PostTask(
BrowserThread::UI, FROM_HERE,
NewRunnableMethod(this, &ExternalMetrics::RecordCrashUI, crash_kind));
}
void ExternalMetrics::RecordHistogram(const char* histogram_data) {
int sample, min, max, nbuckets;
char name[128]; // length must be consistent with sscanf format below.
......@@ -194,6 +208,8 @@ void ExternalMetrics::CollectEvents() {
char* value = reinterpret_cast<char*>(p + 1);
if (test_recorder_ != NULL) {
test_recorder_(name, value);
} else if (strcmp(name, "crash") == 0) {
RecordCrash(value);
} else if (strcmp(name, "histogram") == 0) {
RecordHistogram(value);
} else if (strcmp(name, "linearhistogram") == 0) {
......
......@@ -57,6 +57,13 @@ class ExternalMetrics : public base::RefCountedThreadSafe<ExternalMetrics> {
// Passes an action event to the UMA service.
void RecordAction(const char* action_name);
// Records an external crash of the given string description to
// UMA service on the UI thread.
void RecordCrashUI(const std::string& crash_kind);
// Records an external crash of the given string description.
void RecordCrash(const std::string& crash_kind);
// Passes an histogram event to the UMA service. |histogram_data| is in the
// form <histogram-name> <sample> <min> <max> <buckets_count>.
void RecordHistogram(const char* histogram_data);
......
......@@ -87,6 +87,12 @@ TEST(ExternalMetricsTest, ParseExternalMetricsFile) {
}
}
// Sends a crash message.
int expect_count = nhist;
SendMessage(path, "crash", "user");
external_metrics->CollectEvents();
CheckMessage("crash", "user", ++expect_count);
// Sends a message that's too large.
char b[MAXLENGTH + 100];
for (i = 0; i < MAXLENGTH + 99; i++) {
......@@ -94,35 +100,36 @@ TEST(ExternalMetricsTest, ParseExternalMetricsFile) {
}
b[i] = '\0';
SendMessage(path, b, "yyy");
// Expect logged errors about bad message size.
external_metrics->CollectEvents();
EXPECT_EQ(received_count, nhist);
EXPECT_EQ(expect_count, received_count);
// Sends a malformed message (first string is not null-terminated).
i = 100 + sizeof(i);
int fd = open(path, O_CREAT | O_WRONLY, 0666);
EXPECT_GT(fd, 0);
EXPECT_EQ(write(fd, &i, sizeof(i)), static_cast<int>(sizeof(i)));
EXPECT_EQ(write(fd, b, i), i);
EXPECT_EQ(close(fd), 0);
EXPECT_EQ(static_cast<int>(sizeof(i)), write(fd, &i, sizeof(i)));
EXPECT_EQ(i, write(fd, b, i));
EXPECT_EQ(0, close(fd));
external_metrics->CollectEvents();
EXPECT_EQ(received_count, nhist);
EXPECT_EQ(expect_count, received_count);
// Sends a malformed message (second string is not null-terminated).
b[50] = '\0';
fd = open(path, O_CREAT | O_WRONLY, 0666);
EXPECT_GT(fd, 0);
EXPECT_EQ(write(fd, &i, sizeof(i)), static_cast<int>(sizeof(i)));
EXPECT_EQ(write(fd, b, i), i);
EXPECT_EQ(close(fd), 0);
EXPECT_EQ(static_cast<int>(sizeof(i)), write(fd, &i, sizeof(i)));
EXPECT_EQ(i, write(fd, b, i));
EXPECT_EQ(0, close(fd));
external_metrics->CollectEvents();
EXPECT_EQ(received_count, nhist);
EXPECT_EQ(expect_count, received_count);
// Checks that we survive when file doesn't exist.
EXPECT_EQ(unlink(path), 0);
EXPECT_EQ(0, unlink(path));
external_metrics->CollectEvents();
EXPECT_EQ(received_count, nhist);
EXPECT_EQ(expect_count, received_count);
}
} // namespace chromeos
......@@ -109,10 +109,9 @@ void MetricsLog::RecordIncrementalStabilityElements() {
}
}
void MetricsLog::WriteStabilityElement() {
void MetricsLog::WriteStabilityElement(PrefService* pref) {
DCHECK(!locked_);
PrefService* pref = g_browser_process->local_state();
DCHECK(pref);
// Get stability attributes out of Local State, zeroing out stored values.
......@@ -232,6 +231,38 @@ void MetricsLog::WriteRealtimeStabilityAttributes(PrefService* pref) {
pref->SetInteger(prefs::kStabilityChildProcessCrashCount, 0);
}
#if defined(OS_CHROMEOS)
count = pref->GetInteger(prefs::kStabilityOtherUserCrashCount);
if (count) {
// TODO(kmixter): Write attribute once log server supports it
// and remove warning log.
// WriteIntAttribute("otherusercrashcount", count);
LOG(WARNING) << "Not yet able to send otherusercrashcount="
<< count;
pref->SetInteger(prefs::kStabilityOtherUserCrashCount, 0);
}
count = pref->GetInteger(prefs::kStabilityKernelCrashCount);
if (count) {
// TODO(kmixter): Write attribute once log server supports it
// and remove warning log.
// WriteIntAttribute("kernelcrashcount", count);
LOG(WARNING) << "Not yet able to send kernelcrashcount="
<< count;
pref->SetInteger(prefs::kStabilityKernelCrashCount, 0);
}
count = pref->GetInteger(prefs::kStabilitySystemUncleanShutdownCount);
if (count) {
// TODO(kmixter): Write attribute once log server supports it
// and remove warning log.
// WriteIntAttribute("systemuncleanshutdowns", count);
LOG(WARNING) << "Not yet able to send systemuncleanshutdowns="
<< count;
pref->SetInteger(prefs::kStabilitySystemUncleanShutdownCount, 0);
}
#endif // OS_CHROMEOS
int64 recent_duration = GetIncrementalUptime(pref);
if (recent_duration)
WriteInt64Attribute("uptimesec", recent_duration);
......@@ -277,7 +308,7 @@ void MetricsLog::RecordEnvironment(
WritePluginList(plugin_list);
WriteStabilityElement();
WriteStabilityElement(pref);
{
OPEN_ELEMENT_FOR_SCOPE("cpu");
......
......@@ -64,6 +64,8 @@ class MetricsLog : public MetricsLogBase {
virtual MetricsLog* AsMetricsLog();
private:
FRIEND_TEST_ALL_PREFIXES(MetricsLogTest, ChromeOSStabilityData);
// Returns the date at which the current metrics client ID was created as
// a string containing milliseconds since the epoch, or "0" if none was found.
std::string GetInstallDate() const;
......@@ -71,7 +73,7 @@ class MetricsLog : public MetricsLogBase {
// Writes application stability metrics (as part of the profile log).
// NOTE: Has the side-effect of clearing those counts.
void WriteStabilityElement();
void WriteStabilityElement(PrefService* pref);
// Within stability group, write plugin crash stats.
void WritePluginStabilityElements(PrefService* pref);
......
......@@ -7,6 +7,9 @@
#include "base/string_util.h"
#include "base/time.h"
#include "chrome/browser/metrics/metrics_log.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/common/pref_names.h"
#include "chrome/test/testing_profile.h"
#include "googleurl/src/gurl.h"
#include "testing/gtest/include/gtest/gtest.h"
......@@ -24,10 +27,10 @@ static void NormalizeBuildtime(std::string* xml_encoded) {
std::string prefix = "buildtime=\"";
const char postfix = '\"';
size_t offset = xml_encoded->find(prefix);
ASSERT_GT(offset, 0u);
ASSERT_NE(std::string::npos, offset);
offset += prefix.size();
size_t postfix_position = xml_encoded->find(postfix, offset);
ASSERT_GT(postfix_position, offset);
ASSERT_NE(std::string::npos, postfix_position);
for (size_t i = offset; i < postfix_position; ++i) {
char digit = xml_encoded->at(i);
ASSERT_GE(digit, '0');
......@@ -192,6 +195,52 @@ TEST(MetricsLogTest, ChromeOSLoadEvent) {
ASSERT_EQ(expected_output, encoded);
}
TEST(MetricsLogTest, ChromeOSStabilityData) {
NoTimeMetricsLog log("bogus client ID", 0);
TestingProfile profile;
PrefService* pref = profile.GetPrefs();
pref->SetInteger(prefs::kStabilityChildProcessCrashCount, 10);
pref->SetInteger(prefs::kStabilityOtherUserCrashCount, 11);
pref->SetInteger(prefs::kStabilityKernelCrashCount, 12);
pref->SetInteger(prefs::kStabilitySystemUncleanShutdownCount, 13);
std::string expected_output = StringPrintf(
"<log clientid=\"bogus client ID\" buildtime=\"123456789\" "
"appversion=\"%s\">\n"
"<stability stuff>\n", MetricsLog::GetVersionString().c_str());
// Expect 3 warnings about not yet being able to send the
// Chrome OS stability stats.
log.WriteStabilityElement(profile.GetPrefs());
log.CloseLog();
int size = log.GetEncodedLogSize();
ASSERT_GT(size, 0);
EXPECT_EQ(0, pref->GetInteger(prefs::kStabilityChildProcessCrashCount));
EXPECT_EQ(0, pref->GetInteger(prefs::kStabilityOtherUserCrashCount));
EXPECT_EQ(0, pref->GetInteger(prefs::kStabilityKernelCrashCount));
EXPECT_EQ(0, pref->GetInteger(prefs::kStabilitySystemUncleanShutdownCount));
std::string encoded;
// Leave room for the NUL terminator.
bool encoding_result = log.GetEncodedLog(
WriteInto(&encoded, size + 1), size);
ASSERT_TRUE(encoding_result);
// Check that we can find childprocesscrashcount, but not
// any of the ChromeOS ones that we are not emitting until log
// servers can handle them.
EXPECT_NE(std::string::npos,
encoded.find(" childprocesscrashcount=\"10\""));
EXPECT_EQ(std::string::npos,
encoded.find(" otherusercrashcount="));
EXPECT_EQ(std::string::npos,
encoded.find(" kernelcrashcount="));
EXPECT_EQ(std::string::npos,
encoded.find(" systemuncleanshutdowns="));
}
#endif // OS_CHROMEOS
// Make sure our ID hashes are the same as what we see on the server side.
......
......@@ -400,6 +400,12 @@ void MetricsService::RegisterPrefs(PrefService* local_state) {
0);
local_state->RegisterIntegerPref(prefs::kStabilityDebuggerPresent, 0);
local_state->RegisterIntegerPref(prefs::kStabilityDebuggerNotPresent, 0);
#if defined(OS_CHROMEOS)
local_state->RegisterIntegerPref(prefs::kStabilityOtherUserCrashCount, 0);
local_state->RegisterIntegerPref(prefs::kStabilityKernelCrashCount, 0);
local_state->RegisterIntegerPref(prefs::kStabilitySystemUncleanShutdownCount,
0);
#endif // OS_CHROMEOS
local_state->RegisterDictionaryPref(prefs::kProfileMetrics);
local_state->RegisterIntegerPref(prefs::kNumBookmarksOnBookmarkBar, 0);
......@@ -1715,6 +1721,22 @@ void MetricsService::LogRendererHang() {
IncrementPrefValue(prefs::kStabilityRendererHangCount);
}
#if defined(OS_CHROMEOS)
void MetricsService::LogChromeOSCrash(const std::string &crash_type) {
if (crash_type == "user")
IncrementPrefValue(prefs::kStabilityOtherUserCrashCount);
else if (crash_type == "kernel")
IncrementPrefValue(prefs::kStabilityKernelCrashCount);
else if (crash_type == "uncleanshutdown")
IncrementPrefValue(prefs::kStabilitySystemUncleanShutdownCount);
else
NOTREACHED() << "Unexpected Chrome OS crash type " << crash_type;
// Wake up metrics logs sending if necessary now that new
// log data is available.
HandleIdleSinceLastTransmission(false);
}
#endif // OS_CHROMEOS
void MetricsService::LogChildProcessChange(
NotificationType type,
const NotificationSource& source,
......
......@@ -125,17 +125,12 @@ class MetricsService : public NotificationObserver,
void StoreUnsentLogs();
#if defined(OS_CHROMEOS)
// Returns the hardware class of the Chrome OS device (e.g.,
// hardware qualification ID), or "unknown" if the hardware class is
// not available. The hardware class identifies the configured
// system components such us CPU, WiFi adapter, etc. Note that this
// routine invokes an external utility to determine the hardware
// class.
static std::string GetHardwareClass();
// Start the external metrics service, which collects metrics from Chrome OS
// and passes them to UMA.
void StartExternalMetrics();
// Records a Chrome OS crash.
void LogChromeOSCrash(const std::string &crash_type);
#endif
bool recording_active() const;
......
......@@ -757,6 +757,20 @@ const char kStabilityRendererHangCount[] =
const char kStabilityChildProcessCrashCount[] =
"user_experience_metrics.stability.child_process_crash_count";
// On Chrome OS, total number of non-Chrome user process crashes
// since the last report.
const char kStabilityOtherUserCrashCount[] =
"user_experience_metrics.stability.other_user_crash_count";
// On Chrome OS, total number of kernel crashes since the last report.
const char kStabilityKernelCrashCount[] =
"user_experience_metrics.stability.kernel_crash_count";
// On Chrome OS, total number of unclean system shutdowns since the
// last report.
const char kStabilitySystemUncleanShutdownCount[] =
"user_experience_metrics.stability.system_unclean_shutdowns";
// Number of times the browser has been able to register crash reporting.
const char kStabilityBreakpadRegistrationSuccess[] =
"user_experience_metrics.stability.breakpad_registration_ok";
......
......@@ -261,6 +261,9 @@ extern const char kStabilityLaunchTimeSec[];
extern const char kStabilityLastTimestampSec[];
extern const char kStabilityRendererHangCount[];
extern const char kStabilityChildProcessCrashCount[];
extern const char kStabilityOtherUserCrashCount[];
extern const char kStabilityKernelCrashCount[];
extern const char kStabilitySystemUncleanShutdownCount[];
extern const char kStabilityBreakpadRegistrationSuccess[];
extern const char kStabilityBreakpadRegistrationFail[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment