Commit 50bb6106 authored by Brian White's avatar Brian White Committed by Commit Bot

Support 'sampling groups' for consistency between UKM events.

This allows a metric to declare its sampling to exactly match another,
not just in frequency but also which pages are sampled in/out.  This
will allow sampling for interdependent metrics which otherwise have
to be unsampled in order to be accurately compared.

It is activated by having a Finch parameter string give the name of
the control metric instead of an integer that is the sampling rate.

Some functionality was broken out into other functions for easier
testing.

Bug: 766909
Change-Id: I1d5f3a248722e44d4d5790f9581dc6d8b1eaa0e8
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2151107Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Commit-Queue: Brian White <bcwhite@chromium.org>
Cr-Commit-Position: refs/heads/master@{#763592}
parent 54892d0a
......@@ -665,12 +665,7 @@ void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) {
LoadExperimentSamplingInfo();
}
auto found = event_sampling_rates_.find(entry->event_hash);
int sampling_rate = (found != event_sampling_rates_.end())
? found->second
: default_sampling_rate_;
bool sampled_in =
IsSampledIn(entry->source_id, entry->event_hash, sampling_rate);
bool sampled_in = IsSampledIn(entry->source_id, entry->event_hash);
if (!sampled_in) {
RecordDroppedEntry(DroppedDataReason::SAMPLED_OUT);
......@@ -707,30 +702,63 @@ void UkmRecorderImpl::LoadExperimentSamplingInfo() {
// Check the parameters for sampling controls.
std::map<std::string, std::string> params;
if (base::GetFieldTrialParamsByFeature(kUkmSamplingRateFeature, &params)) {
for (const auto& kv : params) {
const std::string& key = kv.first;
if (key.length() == 0)
continue;
LoadExperimentSamplingParams(params);
}
}
// Keys starting with an underscore are global configuration.
if (key.at(0) == '_') {
if (key == "_default_sampling") {
int sampling;
// We only load non-negative global sampling rates.
if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
default_sampling_rate_ = sampling;
}
continue;
void UkmRecorderImpl::LoadExperimentSamplingParams(
const std::map<std::string, std::string>& params) {
for (const auto& kv : params) {
const std::string& key = kv.first;
if (key.length() == 0)
continue;
// Keys starting with an underscore are global configuration.
if (key.at(0) == '_') {
if (key == "_default_sampling") {
int sampling;
// We only load non-negative global sampling rates.
if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
default_sampling_rate_ = sampling;
}
continue;
}
// Anything else is an event name.
int sampling;
if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
event_sampling_rates_[base::HashMetricName(key)] = sampling;
// Anything else is an event name.
int sampling;
auto hash = base::HashMetricName(key);
if (base::StringToInt(kv.second, &sampling)) {
// If the parameter is a number then that's the sampling rate.
if (sampling >= 0)
event_sampling_rates_[hash] = sampling;
} else {
// If the parameter is a string then it's the name of another metric
// to which it should be slaved. This allows different metrics to be
// sampled in or out together.
event_sampling_master_[hash] = base::HashMetricName(kv.second);
}
}
}
bool UkmRecorderImpl::IsSampledIn(int64_t source_id, uint64_t event_id) {
// Determine the sampling rate. It's one of:
// - the default
// - an explicit sampling rate
// - a group sampling rate
int sampling_rate = default_sampling_rate_;
uint64_t sampling_hash = event_id;
auto master_found = event_sampling_master_.find(sampling_hash);
if (master_found != event_sampling_master_.end()) {
sampling_hash = master_found->second;
}
auto rate_found = event_sampling_rates_.find(sampling_hash);
if (rate_found != event_sampling_rates_.end()) {
sampling_rate = rate_found->second;
}
return IsSampledIn(source_id, sampling_hash, sampling_rate);
}
bool UkmRecorderImpl::IsSampledIn(int64_t source_id,
uint64_t event_id,
int sampling_rate) {
......
......@@ -90,9 +90,12 @@ class UkmRecorderImpl : public UkmRecorder {
}
protected:
// Calculates sampled in/out for a specific source/event based on a given
// |sampling_rate|. This function is guaranteed to always return the same
// result over the life of this object for the same input parameters.
// Calculates sampled in/out for a specific source/event based on internal
// configuration. This function is guaranteed to always return the same
// result over the life of this object for the same config & input parameters.
bool IsSampledIn(int64_t source_id, uint64_t event_id);
// Like above but uses a passed |sampling_rate| instead of internal config.
bool IsSampledIn(int64_t source_id, uint64_t event_id, int sampling_rate);
// Cache the list of whitelisted entries from the field trial parameter.
......@@ -167,9 +170,14 @@ class UkmRecorderImpl : public UkmRecorder {
void RecordSource(std::unique_ptr<UkmSource> source);
// Load sampling configurations from field-trial information.
// Loads sampling configurations from field-trial information.
void LoadExperimentSamplingInfo();
// Loads sampling configuration from the key/value "params" of a field-trial.
// This is separated from the above to ease testing.
void LoadExperimentSamplingParams(
const std::map<std::string, std::string>& params);
// Whether recording new data is currently allowed.
bool recording_enabled_ = false;
......@@ -200,6 +208,10 @@ class UkmRecorderImpl : public UkmRecorder {
int default_sampling_rate_ = -1; // -1 == not yet loaded
base::flat_map<uint64_t, int> event_sampling_rates_;
// If an event's sampling is "slaved" to another, the hashes of the slave
// and the master are recorded here.
base::flat_map<uint64_t, uint64_t> event_sampling_master_;
// Contains data from various recordings which periodically get serialized
// and cleared by StoreRecordingsInReport() and may be Purged().
struct Recordings {
......
......@@ -5,6 +5,7 @@
#include "components/ukm/ukm_recorder_impl.h"
#include "base/bind.h"
#include "base/metrics/metrics_hashes.h"
#include "base/metrics/ukm_source_id.h"
#include "base/test/scoped_feature_list.h"
#include "base/test/task_environment.h"
......@@ -59,6 +60,34 @@ TEST(UkmRecorderImplTest, IsSampledIn) {
EXPECT_TRUE(impl.IsSampledIn(3, 2, 2));
EXPECT_TRUE(impl.IsSampledIn(4, 1, 2));
EXPECT_FALSE(impl.IsSampledIn(4, 2, 2));
// Load a configuration for more detailed testing.
std::map<std::string, std::string> params = {
{"y.a", "3"},
{"y.b", "y.a"},
{"y.c", "y.a"},
};
impl.LoadExperimentSamplingParams(params);
EXPECT_LT(impl.default_sampling_rate_, 0);
// Functions under test take hashes instead of strings.
uint64_t hash_ya = base::HashMetricName("y.a");
uint64_t hash_yb = base::HashMetricName("y.b");
uint64_t hash_yc = base::HashMetricName("y.c");
// Check that the parameters are active.
EXPECT_TRUE(impl.IsSampledIn(11, hash_ya));
EXPECT_TRUE(impl.IsSampledIn(22, hash_ya));
EXPECT_FALSE(impl.IsSampledIn(33, hash_ya));
EXPECT_FALSE(impl.IsSampledIn(44, hash_ya));
EXPECT_FALSE(impl.IsSampledIn(55, hash_ya));
// Check that sampled in/out is the same for all three.
for (int source = 0; source < 100; ++source) {
bool sampled_in = impl.IsSampledIn(source, hash_ya);
EXPECT_EQ(sampled_in, impl.IsSampledIn(source, hash_yb));
EXPECT_EQ(sampled_in, impl.IsSampledIn(source, hash_yc));
}
}
TEST(UkmRecorderImplTest, PurgeExtensionRecordings) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment