Commit 2d408bea authored by Nate Fischer's avatar Nate Fischer Committed by Commit Bot

AW UMA: increase sample rate for pre-stable channels

This increases the sample rate for pre-stable channels (the stable
channel sample rate is still 2%). This change is not expected to have a
significant impact on total UMA volume, due to the relatively small size
of WebView's pre-stable channels.

This addresses privacy concerns (regarding fingerprint-ability for
low-usage applications) by limiting the package name field to 10% of the
population. When sample rate < 10%, we'll upload package name with all
UMA logs (this is the status quo). When sample rate > 10%, we'll only
upload package name for the clients which fall within a 10% group (using
similar logic to our existing sampling logic).

This continues to maintain the requirement that we only upload the
package name for system apps or Play Store-installed apps (ex. This will
not log the package name for sideloaded apps, even if the client falls
within the 10% package-name-logging group).

Bug: 969803
Test: run_android_webview_unittests --gtest_filter=AwMetricsServiceClientTest.*
Test: build with android_channel="beta", manually observe metrics are logged 99% of the time
Test: build with android_channel="stable", manually observe metrics are logged 2% of the time
Change-Id: I611c76b21f790251ab08de3f72cd0807f9a88f9c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1898711
Commit-Queue: Nate Fischer <ntfschr@chromium.org>
Reviewed-by: default avatarChangwan Ryu <changwan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#712879}
parent 6643278f
......@@ -51,10 +51,18 @@ namespace {
// as a separate client).
const double kStableSampledInRate = 0.02;
// Sample non-stable channels also at 2%. We intend to raise this to 99% in the
// future (for consistency with Chrome and to exercise the out-of-sample code
// path).
const double kBetaDevCanarySampledInRate = 0.02;
// Sample non-stable channels at 99%, to boost volume for pre-stable
// experiments. We choose 99% instead of 100% for consistency with Chrome and to
// exercise the out-of-sample code path.
const double kBetaDevCanarySampledInRate = 0.99;
// As a mitigation to preserve use privacy, the privacy team has asked that we
// upload package name with no more than 10% of UMA records. This is to mitigate
// fingerprinting for users on low-usage applications (if an app only has a
// a small handful of users, there's a very good chance many of them won't be
// uploading UMA records due to sampling). Do not change this constant without
// consulting with the privacy team.
const double kPackageNameLimitRate = 0.10;
// Callbacks for metrics::MetricsStateManager::Create. Store/LoadClientInfo
// allow Windows Chrome to back up ClientInfo. They're no-ops for WebView.
......@@ -66,40 +74,18 @@ std::unique_ptr<metrics::ClientInfo> LoadClientInfo() {
return client_info;
}
// WebView metrics are sampled at (possibly) different rates depending on
// channel, based on the client ID. Sampling is hard-coded (rather than
// controlled via variations, as in Chrome) because:
// - WebView is slow to download the variations seed and propagate it to each
// app, so we'd miss metrics from the first few runs of each app.
// - WebView uses the low-entropy source for all studies, so there would be
// crosstalk between the metrics sampling study and all other studies.
bool IsInSample(const std::string& client_id) {
DCHECK(!client_id.empty());
double sampled_in_rate = kBetaDevCanarySampledInRate;
// Down-sample unknown channel as a precaution in case it ends up being
// shipped to Stable users.
version_info::Channel channel = version_info::android::GetChannel();
if (channel == version_info::Channel::STABLE ||
channel == version_info::Channel::UNKNOWN) {
sampled_in_rate = kStableSampledInRate;
}
// client_id comes from base::GenerateGUID(), so its value is random/uniform,
// except for a few bit positions with fixed values, and some hyphens. Rather
// than separating the random payload from the fixed bits, just hash the whole
// thing, to produce a new random/~uniform value.
uint32_t hash = base::PersistentHash(client_id);
bool UintFallsInBottomPercentOfValues(uint32_t value, double percent) {
DCHECK_GT(percent, 0);
DCHECK_LT(percent, 1.00);
// Since hashing is ~uniform, the chance that the value falls in the bottom
// X% of possible values is X%. UINT32_MAX fits within the range of integers
// that can be expressed precisely by a 64-bit double. Casting back to a
// uint32_t means the effective sample rate is within a 1/UINT32_MAX error
// margin.
uint32_t sampled_in_threshold =
static_cast<uint32_t>(static_cast<double>(UINT32_MAX) * sampled_in_rate);
return hash < sampled_in_threshold;
// uint32_t means we can determine if the value falls within the bottom X%,
// within a 1/UINT32_MAX error margin.
uint32_t value_threshold =
static_cast<uint32_t>(static_cast<double>(UINT32_MAX) * percent);
return value < value_threshold;
}
std::unique_ptr<metrics::MetricsService> CreateMetricsService(
......@@ -315,7 +301,7 @@ bool AwMetricsServiceClient::ShouldStartUpFastForTesting() const {
}
std::string AwMetricsServiceClient::GetAppPackageName() {
if (CanRecordPackageName()) {
if (IsInPackageNameSample() && CanRecordPackageNameForAppType()) {
JNIEnv* env = base::android::AttachCurrentThread();
base::android::ScopedJavaLocalRef<jstring> j_app_name =
Java_AwMetricsServiceClient_getAppPackageName(env);
......@@ -325,16 +311,55 @@ std::string AwMetricsServiceClient::GetAppPackageName() {
return std::string();
}
// WebView metrics are sampled at (possibly) different rates depending on
// channel, based on the client ID. Sampling is hard-coded (rather than
// controlled via variations, as in Chrome) because:
// - WebView is slow to download the variations seed and propagate it to each
// app, so we'd miss metrics from the first few runs of each app.
// - WebView uses the low-entropy source for all studies, so there would be
// crosstalk between the metrics sampling study and all other studies.
double AwMetricsServiceClient::GetSampleRate() {
double sampled_in_rate = kBetaDevCanarySampledInRate;
// Down-sample unknown channel as a precaution in case it ends up being
// shipped to Stable users.
version_info::Channel channel = version_info::android::GetChannel();
if (channel == version_info::Channel::STABLE ||
channel == version_info::Channel::UNKNOWN) {
sampled_in_rate = kStableSampledInRate;
}
return sampled_in_rate;
}
bool AwMetricsServiceClient::IsInSample() {
// Called in MaybeStartMetrics(), after metrics_service_ is created.
return ::android_webview::IsInSample(metrics_service_->GetClientId());
return IsInSample(base::PersistentHash(metrics_service_->GetClientId()));
}
bool AwMetricsServiceClient::IsInSample(uint32_t value) {
return UintFallsInBottomPercentOfValues(value, GetSampleRate());
}
bool AwMetricsServiceClient::CanRecordPackageName() {
bool AwMetricsServiceClient::CanRecordPackageNameForAppType() {
// Check with Java side, to see if it's OK to log the package name for this
// type of app (see Java side for the specific requirements).
JNIEnv* env = base::android::AttachCurrentThread();
return Java_AwMetricsServiceClient_canRecordPackageName(env);
return Java_AwMetricsServiceClient_canRecordPackageNameForAppType(env);
}
bool AwMetricsServiceClient::IsInPackageNameSample() {
// Check if this client falls within the group for which it's acceptable to
// log package name. This guarantees we enforce the privacy requirement
// because we never log package names for more than kPackageNameLimitRate
// percent of clients. We'll actually log package name for less than this,
// because we also filter out packages for certain types of apps (see
// CanRecordPackageNameForAppType()).
return IsInPackageNameSample(
base::PersistentHash(metrics_service_->GetClientId()));
}
bool AwMetricsServiceClient::IsInPackageNameSample(uint32_t value) {
return UintFallsInBottomPercentOfValues(value, kPackageNameLimitRate);
}
// static
......
......@@ -133,8 +133,34 @@ class AwMetricsServiceClient : public metrics::MetricsServiceClient,
std::string GetAppPackageName() override;
protected:
virtual bool IsInSample(); // virtual for testing
virtual bool CanRecordPackageName(); // virtual for testing
// Returns the metrics sampling rate, to be used by IsInSample(). This is a
// double in the non-inclusive range (0.00, 1.00). Virtual for testing.
virtual double GetSampleRate();
// Determines if the client is within the random sample of clients for which
// we log metrics. If this returns false, AwMetricsServiceClient should
// indicate reporting is disabled. Sampling is due to storage/bandwidth
// considerations. Virtual for testing.
virtual bool IsInSample();
// Prefer calling the IsInSample() which takes no arguments. Virtual for
// testing.
virtual bool IsInSample(uint32_t value);
// Determines if the embedder app is the type of app for which we may log the
// package name. If this returns false, GetAppPackageName() must return empty
// string. Virtual for testing.
virtual bool CanRecordPackageNameForAppType();
// Determines if this client falls within the group for which it's acceptable
// to include the embedding app's package name. If this returns false,
// GetAppPackageName() must return the empty string (for
// privacy/fingerprintability reasons). Virtual for testing.
virtual bool IsInPackageNameSample();
// Prefer calling the IsInPackageNameSample() which takes no arguments.
// Virtual for testing.
virtual bool IsInPackageNameSample(uint32_t value);
private:
void MaybeStartMetrics();
......
......@@ -20,13 +20,27 @@
namespace android_webview {
namespace {
// Scales up a uint32_t in the inverse of how UintFallsInBottomPercentOfValues()
// makes its judgment. This is useful so tests can use integers, which itself
// helps to avoid rounding issues.
uint32_t ScaleValue(uint32_t value) {
DCHECK_GE(value, 0u);
DCHECK_LE(value, 100u);
double rate = static_cast<double>(value) / 100;
return UINT32_MAX * rate;
}
// For client ID format, see:
// https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)
const char kTestClientId[] = "01234567-89ab-40cd-80ef-0123456789ab";
class TestClient : public AwMetricsServiceClient {
public:
TestClient() : in_sample_(true), record_package_name_(true) {}
TestClient()
: sampled_in_rate_(1.00),
in_sample_(true),
record_package_name_for_app_type_(true),
in_package_name_sample_(true) {}
~TestClient() override {}
bool IsRecordingActive() {
......@@ -35,16 +49,34 @@ class TestClient : public AwMetricsServiceClient {
return service->recording_active();
return false;
}
void SetSampleRate(double value) { sampled_in_rate_ = value; }
void SetInSample(bool value) { in_sample_ = value; }
void SetRecordPackageName(bool value) { record_package_name_ = value; }
void SetRecordPackageNameForAppType(bool value) {
record_package_name_for_app_type_ = value;
}
void SetInPackageNameSample(bool value) { in_package_name_sample_ = value; }
// Expose the super class implementation for testing.
bool IsInSample(uint32_t value) override {
return AwMetricsServiceClient::IsInSample(value);
}
bool IsInPackageNameSample(uint32_t value) override {
return AwMetricsServiceClient::IsInPackageNameSample(value);
}
protected:
double GetSampleRate() override { return sampled_in_rate_; }
bool IsInSample() override { return in_sample_; }
bool CanRecordPackageName() override { return record_package_name_; }
bool CanRecordPackageNameForAppType() override {
return record_package_name_for_app_type_;
}
bool IsInPackageNameSample() override { return in_package_name_sample_; }
private:
double sampled_in_rate_;
bool in_sample_;
bool record_package_name_;
bool record_package_name_for_app_type_;
bool in_package_name_sample_;
DISALLOW_COPY_AND_ASSIGN(TestClient);
};
......@@ -133,12 +165,24 @@ TEST_F(AwMetricsServiceClientTest, TestSetConsentFalseClearsClientId) {
EXPECT_FALSE(prefs->HasPrefPath(metrics::prefs::kMetricsClientID));
}
TEST_F(AwMetricsServiceClientTest, TestShouldNotUploadPackageName) {
TEST_F(AwMetricsServiceClientTest, TestShouldNotUploadPackageName_AppType) {
auto prefs = CreateTestPrefs();
prefs->SetString(metrics::prefs::kMetricsClientID, kTestClientId);
auto client = CreateAndInitTestClient(prefs.get());
client->SetHaveMetricsConsent(true, true);
client->SetRecordPackageNameForAppType(false);
client->SetInPackageNameSample(true);
std::string package_name = client->GetAppPackageName();
EXPECT_TRUE(package_name.empty());
}
TEST_F(AwMetricsServiceClientTest, TestShouldNotUploadPackageName_SampledOut) {
auto prefs = CreateTestPrefs();
prefs->SetString(metrics::prefs::kMetricsClientID, kTestClientId);
auto client = CreateAndInitTestClient(prefs.get());
client->SetHaveMetricsConsent(true, true);
client->SetRecordPackageName(false);
client->SetRecordPackageNameForAppType(true);
client->SetInPackageNameSample(false);
std::string package_name = client->GetAppPackageName();
EXPECT_TRUE(package_name.empty());
}
......@@ -148,11 +192,68 @@ TEST_F(AwMetricsServiceClientTest, TestCanUploadPackageName) {
prefs->SetString(metrics::prefs::kMetricsClientID, kTestClientId);
auto client = CreateAndInitTestClient(prefs.get());
client->SetHaveMetricsConsent(true, true);
client->SetRecordPackageName(true);
client->SetRecordPackageNameForAppType(true);
client->SetInPackageNameSample(true);
std::string package_name = client->GetAppPackageName();
EXPECT_FALSE(package_name.empty());
}
TEST_F(AwMetricsServiceClientTest, TestPackageNameLogic_SampleRateBelowTen) {
auto prefs = CreateTestPrefs();
prefs->SetString(metrics::prefs::kMetricsClientID, kTestClientId);
auto client = CreateAndInitTestClient(prefs.get());
double sample_rate = 0.08;
client->SetSampleRate(sample_rate);
// When GetSampleRate() <= 0.10, everything in-sample should also be in the
// package name sample.
for (uint32_t value = 0; value < 8; ++value) {
EXPECT_TRUE(client->IsInSample(ScaleValue(value)))
<< "Value " << value << " should be in-sample";
EXPECT_TRUE(client->IsInPackageNameSample(ScaleValue(value)))
<< "Value " << value << " should be in the package name sample";
}
// After this, the only thing we care about is that we're out of sample (the
// package name logic shouldn't matter at this point, because we won't upload
// any records).
for (uint32_t value = 8; value < 100; ++value) {
EXPECT_FALSE(client->IsInSample(ScaleValue(value)))
<< "Value " << value << " should be out of sample";
}
}
TEST_F(AwMetricsServiceClientTest, TestPackageNameLogic_SampleRateAboveTen) {
auto prefs = CreateTestPrefs();
prefs->SetString(metrics::prefs::kMetricsClientID, kTestClientId);
auto client = CreateAndInitTestClient(prefs.get());
double sample_rate = 0.90;
client->SetSampleRate(sample_rate);
// When GetSampleRate() > 0.10, only values up to 0.10 should be in the
// package name sample.
for (uint32_t value = 0; value < 10; ++value) {
EXPECT_TRUE(client->IsInSample(ScaleValue(value)))
<< "Value " << value << " should be in-sample";
EXPECT_TRUE(client->IsInPackageNameSample(ScaleValue(value)))
<< "Value " << value << " should be in the package name sample";
}
// After this (but until we hit the sample rate), clients should be in sample
// but not upload the package name.
for (uint32_t value = 10; value < 90; ++value) {
EXPECT_TRUE(client->IsInSample(ScaleValue(value)))
<< "Value " << value << " should be in-sample";
EXPECT_FALSE(client->IsInPackageNameSample(ScaleValue(value)))
<< "Value " << value << " should be out of the package name sample";
}
// After this, the only thing we care about is that we're out of sample (the
// package name logic shouldn't matter at this point, because we won't upload
// any records).
for (uint32_t value = 90; value < 100; ++value) {
EXPECT_FALSE(client->IsInSample(ScaleValue(value)))
<< "Value " << value << " should be out of sample";
}
}
TEST_F(AwMetricsServiceClientTest, TestCanForceEnableMetrics) {
base::CommandLine::ForCurrentProcess()->AppendSwitch(
metrics::switches::kForceEnableMetricsReporting);
......
......@@ -49,7 +49,7 @@ public class AwMetricsServiceClient {
}
@CalledByNative
private static boolean canRecordPackageName() {
private static boolean canRecordPackageNameForAppType() {
// Only record if it's a system app or it was installed from Play Store.
Context ctx = ContextUtils.getApplicationContext();
String packageName = ctx.getPackageName();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment