Commit 3b27ed93 authored by tby's avatar tby Committed by Commit Bot

[Structured metrics] Add support for projects in core hashing logic.

This CL takes the XML configuration for projects, and uses it to change
hashing behaviour for events.

The main changes are as follows.

 - Some minor renaming in the codegen: key_name_hash is now key_name. I
   realized having "hash" in the name was ambiguous: it could refer to
   either the hash of the key's name (correct) or the name hash of the
   key (dangerous!)

 - The kEventNameHashes list has been renamed kKeyNames, and now
   includes all and only name hashes referring to keys used for hashing.

 - KeyData now no longer works with 'events', it works with 'key_names',
   which refer to either an event or project name hash.

 - StructuredMetricsProvider now looks up an event's key_name, rather
   than event_name_hash, and uses that for hashing.

 - SMP also now computes the user event ID on OnRecord and saves it with
   the hashed data, rather than on ProvideCurrentSessionData. I don't
   know why this wasn't already the case!

This CL minimally modifies the existing unit tests, and a follow-up will
add extra tests covering more edge cases of projects.

Bug: 1016655
Change-Id: I83c5add6c281b48201ebdb94f60aaaad88ce39b4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2115859Reviewed-by: default avatarCharles . <charleszhao@chromium.org>
Commit-Queue: Tony Yeoman <tby@chromium.org>
Cr-Commit-Position: refs/heads/master@{#754371}
parent c5f0defc
......@@ -9,8 +9,9 @@
namespace metrics {
namespace structured {
EventBase::EventBase(uint64_t event_name_hash)
: event_name_hash_(event_name_hash) {}
EventBase::EventBase(uint64_t event_name_hash, uint64_t project_name_hash)
: event_name_hash_(event_name_hash),
project_name_hash_(project_name_hash) {}
EventBase::EventBase(const EventBase& other) = default;
EventBase::~EventBase() = default;
......
......@@ -54,8 +54,10 @@ class EventBase {
uint64_t name_hash() const { return event_name_hash_; }
uint64_t project_name_hash() const { return project_name_hash_; }
protected:
explicit EventBase(uint64_t event_name_hash);
explicit EventBase(uint64_t event_name_hash, uint64_t project_name_hash);
void AddStringMetric(uint64_t name_hash, const std::string& value);
......@@ -65,6 +67,20 @@ class EventBase {
// First 8 bytes of the MD5 hash of the event name, as defined in
// structured.xml. This is calculated by tools/metrics/structured/codegen.py.
uint64_t event_name_hash_;
// The project name hash is used to to determine which key to use for hashing
// events. The project name comes from this event's definition in
// structured.xml, and is decided by the rules:
//
// - if this event references a project, eg. <event name="..."
// project="...">, use that project's name.
//
// - otherwise, use the event's name.
//
// |project_name_hash_| is the first 8 bytes of the MD5 hash of the project
// name.
uint64_t project_name_hash_;
std::vector<Metric> metrics_;
};
......
......@@ -63,34 +63,35 @@ KeyData::KeyData(JsonPrefStore* key_store) : key_store_(key_store) {
KeyData::~KeyData() = default;
base::Optional<std::string> KeyData::ValidateAndGetKey(const uint64_t event) {
base::Optional<std::string> KeyData::ValidateAndGetKey(
const uint64_t project_name_hash) {
DCHECK(key_store_);
const int now = (base::Time::Now() - base::Time::UnixEpoch()).InDays();
bool key_is_valid = true;
// If the key for |key_path| doesn't exist, initialize new key data. Set the
// last rotation to a uniformly selected day between today and
// If the key for |project_name_hash| doesn't exist, initialize new key data.
// Set the last rotation to a uniformly selected day between today and
// |kDefaultRotationPeriod| days ago, to uniformly distribute users amongst
// rotation cohorts.
if (!key_store_->GetValue(KeyPath(event), nullptr)) {
if (!key_store_->GetValue(KeyPath(project_name_hash), nullptr)) {
const int rotation_seed = base::RandInt(0, kDefaultRotationPeriod - 1);
SetRotationPeriod(event, kDefaultRotationPeriod);
SetLastRotation(event, now - rotation_seed);
SetKey(event, GenerateKey());
SetRotationPeriod(project_name_hash, kDefaultRotationPeriod);
SetLastRotation(project_name_hash, now - rotation_seed);
SetKey(project_name_hash, GenerateKey());
LogKeyValidation(KeyValidationState::kCreated);
key_is_valid = false;
}
// If the key for |event| is outdated, generate a new key and write it to
// the |keys| pref store along with updated rotation data. Update the last
// rotation such that the user stays in the same cohort.
const int rotation_period = GetRotationPeriod(event);
const int last_rotation = GetLastRotation(event);
// If the key for |project_name_hash| is outdated, generate a new key and
// write it to the |keys| pref store along with updated rotation data. Update
// the last rotation such that the user stays in the same cohort.
const int rotation_period = GetRotationPeriod(project_name_hash);
const int last_rotation = GetLastRotation(project_name_hash);
if (now - last_rotation > rotation_period) {
const int new_last_rotation = now - (now - last_rotation) % rotation_period;
SetLastRotation(event, new_last_rotation);
SetKey(event, GenerateKey());
SetLastRotation(project_name_hash, new_last_rotation);
SetKey(project_name_hash, GenerateKey());
LogKeyValidation(KeyValidationState::kRotated);
key_is_valid = false;
......@@ -101,7 +102,7 @@ base::Optional<std::string> KeyData::ValidateAndGetKey(const uint64_t event) {
}
const base::Value* key_json;
if (!(key_store_->GetValue(KeyPath(event), &key_json) &&
if (!(key_store_->GetValue(KeyPath(project_name_hash), &key_json) &&
key_json->is_string())) {
LogInternalError(StructuredMetricsError::kMissingKey);
return base::nullopt;
......@@ -117,8 +118,9 @@ base::Optional<std::string> KeyData::ValidateAndGetKey(const uint64_t event) {
}
void KeyData::ValidateKeys() {
for (const uint64_t event : metrics::structured::events::kEventNameHashes) {
ValidateAndGetKey(event);
for (const uint64_t project_name_hash :
metrics::structured::events::kProjectNameHashes) {
ValidateAndGetKey(project_name_hash);
}
}
......@@ -135,8 +137,8 @@ void KeyData::SetRotationPeriod(const uint64_t event,
WriteablePrefStore::DEFAULT_PREF_WRITE_FLAGS);
}
void KeyData::SetKey(const uint64_t event, const std::string& key) {
return key_store_->SetValue(KeyPath(event),
void KeyData::SetKey(const uint64_t project_name_hash, const std::string& key) {
return key_store_->SetValue(KeyPath(project_name_hash),
std::make_unique<base::Value>(key),
WriteablePrefStore::DEFAULT_PREF_WRITE_FLAGS);
}
......@@ -163,9 +165,9 @@ int KeyData::GetRotationPeriod(const uint64_t event) {
return value->GetInt();
}
uint64_t KeyData::UserEventId(const uint64_t event) {
uint64_t KeyData::UserEventId(const uint64_t project_name_hash) {
// Retrieve the key for |event|.
const base::Optional<std::string> key = ValidateAndGetKey(event);
const base::Optional<std::string> key = ValidateAndGetKey(project_name_hash);
if (!key) {
NOTREACHED();
return 0u;
......@@ -177,11 +179,11 @@ uint64_t KeyData::UserEventId(const uint64_t event) {
return hash;
}
uint64_t KeyData::HashForEventMetric(const uint64_t event,
uint64_t KeyData::HashForEventMetric(const uint64_t project_name_hash,
const uint64_t metric,
const std::string& value) {
// Retrieve the key for |event|.
const base::Optional<std::string> key = ValidateAndGetKey(event);
// Retrieve the key for |project_name_hash|.
const base::Optional<std::string> key = ValidateAndGetKey(project_name_hash);
if (!key) {
NOTREACHED();
return 0u;
......
......@@ -55,30 +55,30 @@ class KeyData {
KeyData(const KeyData&) = delete;
KeyData& operator=(const KeyData&) = delete;
// Returns a digest of |value| for |metric| in the context of |event|.
// Terminology: a metric is a (name, value) pair, and an event is a bundle of
// metrics.
// Returns a digest of |value| for |metric| in the context of
// |project_name_hash|. Terminology: a metric is a (name, value) pair, and an
// event is a bundle of metrics.
//
// - |event| is the uint64 name hash of an event.
// - |metric| is the uint64 name hash of a metric within |event|.
// - |project_name_hash| is the uint64 name hash of an event or project.
// - |metric| is the uint64 name hash of a metric within |project_name_hash|.
// - |value| is the string value to hash.
//
// The result is the HMAC digest of the |value| salted with |metric|, using
// the key for |event|. That is:
// the key for |project_name_hash|. That is:
//
// HMAC_SHA256(key(event), concat(value, hex(metric)))
// HMAC_SHA256(key(project_name_hash), concat(value, hex(metric)))
//
// Returns 0u in case of an error.
uint64_t HashForEventMetric(uint64_t event,
uint64_t HashForEventMetric(uint64_t project_name_hash,
uint64_t metric,
const std::string& value);
// Returns an ID for this (user, |event|) pair. |event| is the name of an
// event, represented by the first 8 bytes of the MD5 hash of its name defined
// in structured.xml.
// Returns an ID for this (user, |project_name_hash|) pair.
// |project_name_hash| is the name of an event or project, represented by the
// first 8 bytes of the MD5 hash of its name defined in structured.xml.
//
// The derived ID is the first 8 bytes of SHA256(key(event)). Returns 0u in
// case of an error.
// The derived ID is the first 8 bytes of SHA256(key(project_name_hash)).
// Returns 0u in case of an error.
//
// This ID is intended as the only ID for a particular structured metrics
// event. However, events are uploaded from the device alongside the UMA
......@@ -86,7 +86,7 @@ class KeyData {
// means events are associated with the client ID when uploaded from the
// device. See the class comment of StructuredMetricsProvider for more
// details.
uint64_t UserEventId(uint64_t event);
uint64_t UserEventId(uint64_t project_name_hash);
private:
int GetRotationPeriod(uint64_t event);
......@@ -97,7 +97,7 @@ class KeyData {
// Ensure that a valid key exists for |event|, and return it. Either returns a
// string of size |kKeySize| or base::nullopt, which indicates an error.
base::Optional<std::string> ValidateAndGetKey(uint64_t event);
base::Optional<std::string> ValidateAndGetKey(uint64_t project_name_hash);
void SetKey(uint64_t event, const std::string& key);
// Ensure that valid keys exist for all events.
......
......@@ -45,6 +45,8 @@ constexpr char kKey[] = "abcdefghijklmnopqrstuvwxyzabcdef";
constexpr uint64_t kEventOneHash = UINT64_C(15619026293081468407);
// The name hash of "TestEventTwo".
constexpr uint64_t kEventTwoHash = UINT64_C(15791833939776536363);
// The name hash of "TestProject".
constexpr uint64_t kProjectHash = UINT64_C(17426425568333718899);
// The name hash of "TestMetricOne".
constexpr uint64_t kMetricOneHash = UINT64_C(637929385654885975);
......@@ -83,7 +85,7 @@ std::string RotationPeriodPath(const uint64_t event) {
// Returns the total number of events registered in structured.xml. This is used
// to determine how many keys we expect to load or rotate on initialization.
int NumberOfEvents() {
return sizeof(metrics::structured::events::kEventNameHashes) /
return sizeof(metrics::structured::events::kProjectNameHashes) /
sizeof(uint64_t);
}
......@@ -186,7 +188,7 @@ TEST_F(KeyDataTest, GeneratesKeysForEvents) {
NumberOfEvents());
const std::string key_one = GetString(KeyPath(kEventOneHash));
const std::string key_two = GetString(KeyPath(kEventTwoHash));
const std::string key_two = GetString(KeyPath(kProjectHash));
EXPECT_EQ(key_one.size(), 32ul);
EXPECT_EQ(key_two.size(), 32ul);
......
......@@ -77,9 +77,9 @@ void StructuredMetricsProvider::OnRecord(const EventBase& event) {
// Store hashed values as strings, because the JSON parser only retains 53
// bits of precision for ints. This would corrupt the hashes.
name_value.SetStringKey(
"value",
base::NumberToString(key_data_->HashForEventMetric(
event.name_hash(), metric.name_hash, metric.string_value)));
"value", base::NumberToString(key_data_->HashForEventMetric(
event.project_name_hash(), metric.name_hash,
metric.string_value)));
} else if (metric.type == EventBase::MetricType::kInt) {
name_value.SetIntKey("value", metric.int_value);
}
......@@ -87,9 +87,13 @@ void StructuredMetricsProvider::OnRecord(const EventBase& event) {
metrics.Append(std::move(name_value));
}
// Create an event value containing the metrics and the event name hash.
// Create an event value containing the metrics, the event name hash, and the
// ID that will eventually be used as the profile_event_id of this event.
base::Value event_value(base::Value::Type::DICTIONARY);
event_value.SetStringKey("name", base::NumberToString(event.name_hash()));
event_value.SetStringKey(
"id",
base::NumberToString(key_data_->UserEventId(event.project_name_hash())));
event_value.SetKey("metrics", std::move(metrics));
// Add the event to |storage_|.
......@@ -178,7 +182,14 @@ void StructuredMetricsProvider::ProvideCurrentSessionData(
continue;
}
event_proto->set_event_name_hash(event_name_hash);
event_proto->set_profile_event_id(key_data_->UserEventId(event_name_hash));
uint64_t user_event_id;
if (!base::StringToUint64(event.FindKey("id")->GetString(),
&user_event_id)) {
LogInternalError(StructuredMetricsError::kFailedUintConversion);
continue;
}
event_proto->set_profile_event_id(user_event_id);
for (const auto& metric : event.FindKey("metrics")->GetList()) {
auto* metric_proto = event_proto->add_metrics();
......
......@@ -35,7 +35,8 @@ namespace {
// To test that the right values are calculated for hashed metrics, we need to
// set up some fake keys that we know the output hashes for. kKeyData contains
// the JSON for a simple structured_metrics.json file with keys for the test
// events.
// events. The two keys are ID'd by the name hashes of "TestEventOne" and
// "TestProject", because TestEventTwo is associated with TestProject.
// TODO(crbug.com/1016655): Once custom rotation periods have been implemented,
// change the large constants to 0.
constexpr char kKeyData[] = R"({
......@@ -45,7 +46,7 @@ constexpr char kKeyData[] = R"({
"last_rotation":1000000,
"key":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
},
"15791833939776536363":{
"17426425568333718899":{
"rotation_period":1000000,
"last_rotation":1000000,
"key":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
......@@ -276,7 +277,7 @@ TEST_F(StructuredMetricsProviderTest, EventsReportedCorrectly) {
const auto& metric = event.metrics(0);
EXPECT_EQ(metric.name_hash(), kMetricThreeHash);
EXPECT_EQ(HashToHex(metric.value_hmac()),
// Value of HMAC_256("bbb...b", concat(hex(kMetricOneHash),
// Value of HMAC_256("bbb...b", concat(hex(kProjectHash),
// "value three"))
"86F0169868588DC7");
}
......
......@@ -34,16 +34,16 @@ class EventInfo(object):
def __init__(self, event_obj, project_obj):
self.raw_name = event_obj['name']
self.name = sanitize_name(event_obj['name'])
self.hash = HashName(event_obj['name'])
self.name_hash = HashName(event_obj['name'])
# If a project is associated with this event, project_obj will be non-None
# and we should use the project's name as the key name hash. Otherwise, use
# the event's name as the key name hash.
if project_obj:
key_name = project_obj['name']
project_name = sanitize_name(project_obj['name'])
else:
key_name = event_obj['name']
self.key_name_hash = HashName(key_name)
project_name = sanitize_name(event_obj['name'])
self.project_name_hash = HashName(project_name)
class MetricInfo(object):
......@@ -95,25 +95,29 @@ class Template(object):
file_info = FileInfo(relpath, self.basename)
event_code = []
projects = {
project_name_hashes = set()
defined_projects = {
project['name']: project
for project in data[_PROJECTS_TYPE.tag][_PROJECT_TYPE.tag]
}
for event in data[_EVENTS_TYPE.tag][_EVENT_TYPE.tag]:
project = projects.get(event.get('project'))
event_code.append(self._StampEventCode(file_info, event, project))
defined_project = defined_projects.get(event.get('project'))
event_code.append(self._StampEventCode(file_info, event, defined_project))
project_name_hashes.add(
defined_project['name'] if defined_project else event['name'])
event_code = ''.join(event_code)
event_name_hashes = [
'UINT64_C(%s)' % HashName(metric['name'])
for metric in data[_EVENTS_TYPE.tag][_EVENT_TYPE.tag]
project_name_hashes = [
'UINT64_C(%s)' % HashName(name)
for name in sorted(list(project_name_hashes))
]
event_name_hashes = '{' + ', '.join(event_name_hashes) + '}'
project_name_hashes = '{' + ', '.join(project_name_hashes) + '}'
return self.file_template.format(
file=file_info,
event_code=event_code,
event_name_hashes=event_name_hashes)
project_name_hashes=project_name_hashes)
def WriteFile(self, outdir, relpath, data):
"""Generates code and writes it to a file.
......
......@@ -26,7 +26,7 @@ namespace metrics {{
namespace structured {{
namespace events {{
constexpr uint64_t kEventNameHashes[] = {event_name_hashes};\
constexpr uint64_t kProjectNameHashes[] = {project_name_hashes};\
{event_code}
}} // namespace events
......@@ -43,8 +43,8 @@ class {event.name} final : public ::metrics::structured::EventBase {{
{event.name}();
~{event.name}() override;
static constexpr uint64_t kEventNameHash = UINT64_C({event.hash});
static constexpr uint64_t kKeyNameHash = UINT64_C({event.key_name_hash});\
static constexpr uint64_t kEventNameHash = UINT64_C({event.name_hash});
static constexpr uint64_t kProjectNameHash = UINT64_C({event.project_name_hash});\
{metric_code}
}};\
"""
......@@ -84,7 +84,7 @@ namespace events {{\
IMPL_EVENT_TEMPLATE = """
{event.name}::{event.name}() :
::metrics::structured::EventBase(kEventNameHash) {{}}
::metrics::structured::EventBase(kEventNameHash, kProjectNameHash) {{}}
{event.name}::~{event.name}() = default;\
{metric_code}\
"""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment