Commit d35f5dac authored by Russ Hamilton's avatar Russ Hamilton Committed by Commit Bot

Modify Privacy Budget UKM Entry Filter to measure identifiable surfaces.

Add a "SurfaceTracker" to IdentifiabilityStudyState to keep track of
surfaces that have been previously reported for each source. This is
used for deduplication and to limit the number of metrics sent in each
UKM report.

Bug: 1112787
Change-Id: I00eb2360849ea6772fbfdcc9b1cf91680f8c6173
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2363398Reviewed-by: default avatarAsanka Herath <asanka@chromium.org>
Commit-Queue: Russ Hamilton <behamilton@google.com>
Cr-Commit-Position: refs/heads/master@{#800332}
parent 1a876de2
......@@ -7,6 +7,7 @@ source_set("headers") {
"identifiability_study_state.h",
"privacy_budget_prefs.h",
"privacy_budget_ukm_entry_filter.h",
"sampled_surface_tracker.h",
]
deps = [
......@@ -23,6 +24,7 @@ source_set("privacy_budget") {
"identifiability_study_state.cc",
"privacy_budget_prefs.cc",
"privacy_budget_ukm_entry_filter.cc",
"sampled_surface_tracker.cc",
]
public_deps = [ ":headers" ]
......@@ -45,6 +47,7 @@ source_set("unit_tests") {
sources = [
"identifiability_study_state_unittest.cc",
"privacy_budget_ukm_entry_filter_unittest.cc",
"sampled_surface_tracker_unittest.cc",
]
deps = [
......
......@@ -309,3 +309,13 @@ void IdentifiabilityStudyState::ReconcileLoadedPrefs() {
if (modified)
WriteToPrefs();
}
bool IdentifiabilityStudyState::ShouldRecordSurface(
uint64_t source_id,
blink::IdentifiableSurface surface) {
return tracked_surfaces_.ShouldRecord(source_id, surface.ToUkmMetricHash());
}
void IdentifiabilityStudyState::ResetRecordedSurfaces() {
tracked_surfaces_.Reset();
}
......@@ -10,6 +10,7 @@
#include "base/strings/string_piece_forward.h"
#include "base/thread_annotations.h"
#include "chrome/browser/privacy_budget/privacy_budget_prefs.h"
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include "chrome/common/privacy_budget/privacy_budget_settings_provider.h"
#include "components/prefs/pref_service.h"
#include "third_party/blink/public/common/privacy_budget/identifiability_study_settings.h"
......@@ -30,6 +31,12 @@ class InspectableIdentifiabilityStudyState;
// surface is blocked by a settings change. These are kept around in order to
// minimize the total number of surfaces that we record per client.
//
// * Which identifiable surfaces are "tracked". I.e. we record whether a site
// measured this surface. This restriction is primarily to limit the amount
// of data that gets sent to UKM. Note that this is independent of what
// surfaces are considered "active". This set is reset periodically to ensure
// we get a variety of different measurements.
//
// * The PRNG seed that we use for various pseudo-random operations.
//
// * The study generation.
......@@ -63,6 +70,15 @@ class IdentifiabilityStudyState {
// instances are to be constructed.
static void ResetStateForTesting();
// Returns true if tracking metrics should be recorded for this
// source_id/surface combination.
bool ShouldRecordSurface(uint64_t source_id,
blink::IdentifiableSurface surface);
// Clear the sampled surface state from the state tracker. Ideally this would
// be called each time a UKM report generated.
void ResetRecordedSurfaces();
// A knob that we can use to split data sets from different versions of the
// implementation where the differences could have material effects on the
// data distribution.
......@@ -202,6 +218,19 @@ class IdentifiabilityStudyState {
// * max_active_surfaces_ ≤ kIdentifiabilityStudyMaxSurfaces.
const size_t max_active_surfaces_;
// Set of identifiable surfaces for which we record when the site makes
// measurement surfaces. This set is
// updated as we go unless it is already saturated, and resets itself
// periodically.
//
// Invariants:
//
// * tracked_surfaces_ ∩ settings_.blocked_surfaces() = Ø.
//
// * tracked_surfaces_.GetType() ∩ settings_.blocked_types() = Ø.
//
SampledSurfaceTracker tracked_surfaces_;
SEQUENCE_CHECKER(sequence_checker_);
};
......
......@@ -8,10 +8,14 @@
#include <memory>
#include "base/containers/flat_set.h"
#include "base/rand_util.h"
#include "base/stl_util.h"
#include "base/time/time.h"
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "third_party/blink/public/common/privacy_budget/identifiability_study_settings.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
PrivacyBudgetUkmEntryFilter::PrivacyBudgetUkmEntryFilter(
IdentifiabilityStudyState* state)
......@@ -31,13 +35,40 @@ bool PrivacyBudgetUkmEntryFilter::FilterEntry(
if (!enabled || entry->metrics.empty())
return false;
std::vector<blink::IdentifiableSurface> sampled_surfaces;
sampled_surfaces.reserve(entry->metrics.size());
base::EraseIf(entry->metrics, [&](auto metric) {
return !identifiability_study_state_->ShouldSampleSurface(
blink::IdentifiableSurface::FromMetricHash(metric.first));
const auto surface =
blink::IdentifiableSurface::FromMetricHash(metric.first);
// Exclude the set that are blocked from all measurements.
if (!blink::IdentifiabilityStudySettings::Get()->IsSurfaceAllowed(surface))
return true;
// Record the set of surfaces sampled by the site.
if (identifiability_study_state_->ShouldRecordSurface(entry->source_id,
surface))
sampled_surfaces.push_back(surface);
// Exclude the set that are disabled for this user
return !identifiability_study_state_->ShouldSampleSurface(surface);
});
uint64_t sample_idx = 0;
for (const auto& v : sampled_surfaces) {
// Add entries marking the surfaces that were sampled by the source as
// sampled.
blink::IdentifiableSurface s = blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kMeasuredSurface, sample_idx++);
entry->metrics.insert_or_assign(entry->metrics.end(), s.ToUkmMetricHash(),
v.ToUkmMetricHash());
}
// Identifiability metrics can leak information simply by being measured.
// Hence the metrics that are filtered out aren't returning in
// |removed_metric_hashes|.
return !entry->metrics.empty();
}
void PrivacyBudgetUkmEntryFilter::OnStoreRecordingsInReport() const {
identifiability_study_state_->ResetRecordedSurfaces();
}
......@@ -27,6 +27,7 @@ class PrivacyBudgetUkmEntryFilter : public ukm::UkmEntryFilter {
bool FilterEntry(
ukm::mojom::UkmEntry* entry,
base::flat_set<uint64_t>* removed_metric_hashes) const override;
void OnStoreRecordingsInReport() const override;
private:
IdentifiabilityStudyState* const identifiability_study_state_;
......
......@@ -14,6 +14,7 @@
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
namespace {
......@@ -91,6 +92,12 @@ TEST(PrivacyBudgetUkmEntryFilterStandaloneTest, BlockListedMetrics) {
base::flat_map<uint64_t, int64_t> metrics = {{kBlockedSurface, 1},
{kUnblockedSurface, 2}};
base::flat_map<uint64_t, int64_t> expected_metrics = {
{kUnblockedSurface, 2},
{blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kMeasuredSurface, 0)
.ToUkmMetricHash(),
static_cast<int64_t>(kUnblockedSurface)}};
ukm::mojom::UkmEntryPtr x(base::in_place, 1,
ukm::builders::Identifiability::kEntryNameHash,
metrics);
......@@ -99,6 +106,5 @@ TEST(PrivacyBudgetUkmEntryFilterStandaloneTest, BlockListedMetrics) {
base::flat_set<uint64_t> filtered;
EXPECT_TRUE(filter->FilterEntry(x.get(), &filtered));
EXPECT_TRUE(filtered.empty());
ASSERT_EQ(1u, x->metrics.size());
EXPECT_EQ(kUnblockedSurface, x->metrics.begin()->first);
EXPECT_EQ(expected_metrics, x->metrics);
}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include <vector>
#include "base/rand_util.h"
#include "base/time/time.h"
#include "chrome/browser/privacy_budget/identifiability_study_state.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
namespace {
uint64_t hash(uint64_t x) {
// This is should be a reasonable and fast reversible hash.
// Based a bit on https://naml.us/post/inverse-of-a-hash-function/
x = x * 88388617; // calculate as (x<<23)+(x<<3)+x
x = x ^ (x >> 31);
x = x * 257; // calculate as (x<<8)+x
x = x ^ (x >> 13);
x = x * 769; // calculate as (x<<9)+(x<<8)+x
x = x ^ (x >> 23);
x = x * 127; // calculate as (x<<7)-x
return x;
}
} // namespace
const unsigned SampledSurfaceTracker::kMaxTrackedSurfaces;
SampledSurfaceTracker::SampledSurfaceTracker() = default;
SampledSurfaceTracker::~SampledSurfaceTracker() = default;
void SampledSurfaceTracker::Reset() {
seed_ = base::RandUint64();
surfaces_.clear();
}
bool SampledSurfaceTracker::ShouldRecord(uint64_t source_id, uint64_t surface) {
if (blink::IdentifiableSurface::FromMetricHash(surface).GetType() ==
blink::IdentifiableSurface::Type::kReservedInternal)
return false;
HashKey key = hash(surface ^ seed_);
auto it = surfaces_.find(key);
if (it == surfaces_.end()) {
if (surfaces_.size() >= kMaxTrackedSurfaces &&
key <= surfaces_.begin()->first) {
return false;
}
// We need to add an entry for this surface, possibly bumping an entry out.
surfaces_.insert(std::make_pair(key, base::flat_set<uint64_t>{source_id}));
if (surfaces_.size() > kMaxTrackedSurfaces) {
// Remove the smallest
surfaces_.erase(surfaces_.begin());
}
return true;
}
if (it->second.contains(source_id))
return false;
it->second.insert(source_id);
return true;
}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_PRIVACY_BUDGET_SAMPLED_SURFACE_TRACKER_H_
#define CHROME_BROWSER_PRIVACY_BUDGET_SAMPLED_SURFACE_TRACKER_H_
#include <map>
#include "base/containers/flat_set.h"
#include "base/time/time.h"
class SampledSurfaceTracker {
public:
// Maximum number of surfaces that this class can track. Prevents unbounded
// memory growth.
static constexpr unsigned kMaxTrackedSurfaces = 1000;
SampledSurfaceTracker();
~SampledSurfaceTracker();
bool ShouldRecord(uint64_t source_id, uint64_t surface);
void Reset();
private:
using HashKey = uint64_t;
// We use std::map since it makes it fast to remove the minimum.
std::map<HashKey, base::flat_set<uint64_t>> surfaces_;
uint64_t seed_;
};
#endif // CHROME_BROWSER_PRIVACY_BUDGET_SAMPLED_SURFACE_TRACKER_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
namespace {
uint64_t metric(int i) {
return blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kWebFeature, i)
.ToUkmMetricHash();
}
} // namespace
TEST(SampledSurfaceTrackerTest, Dedup) {
SampledSurfaceTracker t;
EXPECT_TRUE(t.ShouldRecord(0, metric(1)));
EXPECT_FALSE(t.ShouldRecord(0, metric(1)));
EXPECT_TRUE(t.ShouldRecord(1, metric(1)));
}
TEST(SampledSurfaceTrackerTest, SizeLimit) {
SampledSurfaceTracker t;
for (uint64_t i = 0; i < SampledSurfaceTracker::kMaxTrackedSurfaces; i++) {
EXPECT_TRUE(t.ShouldRecord(0, metric(i))) << ": 0," << i;
}
// Now the tracker should be full, but we will still allow new sources.
for (uint64_t i = 0; i < SampledSurfaceTracker::kMaxTrackedSurfaces; i++) {
EXPECT_FALSE(t.ShouldRecord(0, metric(i))) << ": 0," << i;
EXPECT_TRUE(t.ShouldRecord(1, metric(i))) << ": 1," << i;
}
// Add an extra one. This should bump one of the surfaces out.
t.ShouldRecord(0, SampledSurfaceTracker::kMaxTrackedSurfaces + 1);
// We expect only kMaxTrackedSurfaces to return true for a new surface.
unsigned num_true = 0;
for (uint64_t i = 0; i < SampledSurfaceTracker::kMaxTrackedSurfaces + 1;
i++) {
if (t.ShouldRecord(2, metric(i)))
num_true++;
}
EXPECT_EQ(SampledSurfaceTracker::kMaxTrackedSurfaces, num_true);
}
TEST(SampledSurfaceTrackerTest, Reset) {
SampledSurfaceTracker t;
EXPECT_TRUE(t.ShouldRecord(0, metric(0))) << ": 0,0";
EXPECT_FALSE(t.ShouldRecord(0, metric(0))) << ": 0,0";
t.Reset();
EXPECT_TRUE(t.ShouldRecord(0, metric(0))) << ": 0,0";
}
TEST(SampledSurfaceTrackerTest, InvalidMetric) {
SampledSurfaceTracker t;
EXPECT_FALSE(t.ShouldRecord(
0, blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kReservedInternal, 1)
.ToUkmMetricHash()));
}
......@@ -97,6 +97,10 @@ class IdentifiableSurface {
// Extension running content-script.
kExtensionContentScript = 6,
// Represents making a measurement of one of the above surfacess. This
// metric is retained even if filtering discards the surface.
kMeasuredSurface = 7,
// We can use values up to and including |kMax|.
kMax = (1 << kTypeBits) - 1
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment