Commit d35cd337 authored by Maggie Cai's avatar Maggie Cai Committed by Commit Bot

Revert "Modify Privacy Budget UKM Entry Filter to measure identifiable surfaces."

This reverts commit d35f5dac.

Reason for revert: This CL is the possible cause of the breakage of build Linux ChromiumOS MSan Tests since https://ci.chromium.org/p/chromium/builders/ci/Linux%20ChromiumOS%20MSan%20Tests/20302

Original change's description:
> Modify Privacy Budget UKM Entry Filter to measure identifiable surfaces.
> 
> Add a "SurfaceTracker" to IdentifiabilityStudyState to keep track of
> surfaces that have been previously reported for each source. This is
> used for deduplication and to limit the number of metrics sent in each
> UKM report.
> 
> Bug: 1112787
> Change-Id: I00eb2360849ea6772fbfdcc9b1cf91680f8c6173
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2363398
> Reviewed-by: Asanka Herath <asanka@chromium.org>
> Commit-Queue: Russ Hamilton <behamilton@google.com>
> Cr-Commit-Position: refs/heads/master@{#800332}

TBR=asanka@chromium.org,behamilton@google.com

Change-Id: I0a349740361db768696be0703020b229d8e274be
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 1112787
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2367590Reviewed-by: default avatarMaggie Cai <mxcai@chromium.org>
Commit-Queue: Maggie Cai <mxcai@chromium.org>
Cr-Commit-Position: refs/heads/master@{#800492}
parent 0bc9956e
......@@ -7,7 +7,6 @@ source_set("headers") {
"identifiability_study_state.h",
"privacy_budget_prefs.h",
"privacy_budget_ukm_entry_filter.h",
"sampled_surface_tracker.h",
]
deps = [
......@@ -24,7 +23,6 @@ source_set("privacy_budget") {
"identifiability_study_state.cc",
"privacy_budget_prefs.cc",
"privacy_budget_ukm_entry_filter.cc",
"sampled_surface_tracker.cc",
]
public_deps = [ ":headers" ]
......@@ -47,7 +45,6 @@ source_set("unit_tests") {
sources = [
"identifiability_study_state_unittest.cc",
"privacy_budget_ukm_entry_filter_unittest.cc",
"sampled_surface_tracker_unittest.cc",
]
deps = [
......
......@@ -309,13 +309,3 @@ void IdentifiabilityStudyState::ReconcileLoadedPrefs() {
if (modified)
WriteToPrefs();
}
bool IdentifiabilityStudyState::ShouldRecordSurface(
uint64_t source_id,
blink::IdentifiableSurface surface) {
return tracked_surfaces_.ShouldRecord(source_id, surface.ToUkmMetricHash());
}
void IdentifiabilityStudyState::ResetRecordedSurfaces() {
tracked_surfaces_.Reset();
}
......@@ -10,7 +10,6 @@
#include "base/strings/string_piece_forward.h"
#include "base/thread_annotations.h"
#include "chrome/browser/privacy_budget/privacy_budget_prefs.h"
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include "chrome/common/privacy_budget/privacy_budget_settings_provider.h"
#include "components/prefs/pref_service.h"
#include "third_party/blink/public/common/privacy_budget/identifiability_study_settings.h"
......@@ -31,12 +30,6 @@ class InspectableIdentifiabilityStudyState;
// surface is blocked by a settings change. These are kept around in order to
// minimize the total number of surfaces that we record per client.
//
// * Which identifiable surfaces are "tracked". I.e. we record whether a site
// measured this surface. This restriction is primarily to limit the amount
// of data that gets sent to UKM. Note that this is independent of what
// surfaces are considered "active". This set is reset periodically to ensure
// we get a variety of different measurements.
//
// * The PRNG seed that we use for various pseudo-random operations.
//
// * The study generation.
......@@ -70,15 +63,6 @@ class IdentifiabilityStudyState {
// instances are to be constructed.
static void ResetStateForTesting();
// Returns true if tracking metrics should be recorded for this
// source_id/surface combination.
bool ShouldRecordSurface(uint64_t source_id,
blink::IdentifiableSurface surface);
// Clear the sampled surface state from the state tracker. Ideally this would
// be called each time a UKM report generated.
void ResetRecordedSurfaces();
// A knob that we can use to split data sets from different versions of the
// implementation where the differences could have material effects on the
// data distribution.
......@@ -218,19 +202,6 @@ class IdentifiabilityStudyState {
// * max_active_surfaces_ ≤ kIdentifiabilityStudyMaxSurfaces.
const size_t max_active_surfaces_;
// Set of identifiable surfaces for which we record when the site makes
// measurement surfaces. This set is
// updated as we go unless it is already saturated, and resets itself
// periodically.
//
// Invariants:
//
// * tracked_surfaces_ ∩ settings_.blocked_surfaces() = Ø.
//
// * tracked_surfaces_.GetType() ∩ settings_.blocked_types() = Ø.
//
SampledSurfaceTracker tracked_surfaces_;
SEQUENCE_CHECKER(sequence_checker_);
};
......
......@@ -8,14 +8,10 @@
#include <memory>
#include "base/containers/flat_set.h"
#include "base/rand_util.h"
#include "base/stl_util.h"
#include "base/time/time.h"
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "third_party/blink/public/common/privacy_budget/identifiability_study_settings.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
PrivacyBudgetUkmEntryFilter::PrivacyBudgetUkmEntryFilter(
IdentifiabilityStudyState* state)
......@@ -35,40 +31,13 @@ bool PrivacyBudgetUkmEntryFilter::FilterEntry(
if (!enabled || entry->metrics.empty())
return false;
std::vector<blink::IdentifiableSurface> sampled_surfaces;
sampled_surfaces.reserve(entry->metrics.size());
base::EraseIf(entry->metrics, [&](auto metric) {
const auto surface =
blink::IdentifiableSurface::FromMetricHash(metric.first);
// Exclude the set that are blocked from all measurements.
if (!blink::IdentifiabilityStudySettings::Get()->IsSurfaceAllowed(surface))
return true;
// Record the set of surfaces sampled by the site.
if (identifiability_study_state_->ShouldRecordSurface(entry->source_id,
surface))
sampled_surfaces.push_back(surface);
// Exclude the set that are disabled for this user
return !identifiability_study_state_->ShouldSampleSurface(surface);
return !identifiability_study_state_->ShouldSampleSurface(
blink::IdentifiableSurface::FromMetricHash(metric.first));
});
uint64_t sample_idx = 0;
for (const auto& v : sampled_surfaces) {
// Add entries marking the surfaces that were sampled by the source as
// sampled.
blink::IdentifiableSurface s = blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kMeasuredSurface, sample_idx++);
entry->metrics.insert_or_assign(entry->metrics.end(), s.ToUkmMetricHash(),
v.ToUkmMetricHash());
}
// Identifiability metrics can leak information simply by being measured.
// Hence the metrics that are filtered out aren't returning in
// |removed_metric_hashes|.
return !entry->metrics.empty();
}
void PrivacyBudgetUkmEntryFilter::OnStoreRecordingsInReport() const {
identifiability_study_state_->ResetRecordedSurfaces();
}
......@@ -27,7 +27,6 @@ class PrivacyBudgetUkmEntryFilter : public ukm::UkmEntryFilter {
bool FilterEntry(
ukm::mojom::UkmEntry* entry,
base::flat_set<uint64_t>* removed_metric_hashes) const override;
void OnStoreRecordingsInReport() const override;
private:
IdentifiabilityStudyState* const identifiability_study_state_;
......
......@@ -14,7 +14,6 @@
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
namespace {
......@@ -92,12 +91,6 @@ TEST(PrivacyBudgetUkmEntryFilterStandaloneTest, BlockListedMetrics) {
base::flat_map<uint64_t, int64_t> metrics = {{kBlockedSurface, 1},
{kUnblockedSurface, 2}};
base::flat_map<uint64_t, int64_t> expected_metrics = {
{kUnblockedSurface, 2},
{blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kMeasuredSurface, 0)
.ToUkmMetricHash(),
static_cast<int64_t>(kUnblockedSurface)}};
ukm::mojom::UkmEntryPtr x(base::in_place, 1,
ukm::builders::Identifiability::kEntryNameHash,
metrics);
......@@ -106,5 +99,6 @@ TEST(PrivacyBudgetUkmEntryFilterStandaloneTest, BlockListedMetrics) {
base::flat_set<uint64_t> filtered;
EXPECT_TRUE(filter->FilterEntry(x.get(), &filtered));
EXPECT_TRUE(filtered.empty());
EXPECT_EQ(expected_metrics, x->metrics);
ASSERT_EQ(1u, x->metrics.size());
EXPECT_EQ(kUnblockedSurface, x->metrics.begin()->first);
}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include <vector>
#include "base/rand_util.h"
#include "base/time/time.h"
#include "chrome/browser/privacy_budget/identifiability_study_state.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
namespace {
uint64_t hash(uint64_t x) {
// This is should be a reasonable and fast reversible hash.
// Based a bit on https://naml.us/post/inverse-of-a-hash-function/
x = x * 88388617; // calculate as (x<<23)+(x<<3)+x
x = x ^ (x >> 31);
x = x * 257; // calculate as (x<<8)+x
x = x ^ (x >> 13);
x = x * 769; // calculate as (x<<9)+(x<<8)+x
x = x ^ (x >> 23);
x = x * 127; // calculate as (x<<7)-x
return x;
}
} // namespace
const unsigned SampledSurfaceTracker::kMaxTrackedSurfaces;
SampledSurfaceTracker::SampledSurfaceTracker() = default;
SampledSurfaceTracker::~SampledSurfaceTracker() = default;
void SampledSurfaceTracker::Reset() {
seed_ = base::RandUint64();
surfaces_.clear();
}
bool SampledSurfaceTracker::ShouldRecord(uint64_t source_id, uint64_t surface) {
if (blink::IdentifiableSurface::FromMetricHash(surface).GetType() ==
blink::IdentifiableSurface::Type::kReservedInternal)
return false;
HashKey key = hash(surface ^ seed_);
auto it = surfaces_.find(key);
if (it == surfaces_.end()) {
if (surfaces_.size() >= kMaxTrackedSurfaces &&
key <= surfaces_.begin()->first) {
return false;
}
// We need to add an entry for this surface, possibly bumping an entry out.
surfaces_.insert(std::make_pair(key, base::flat_set<uint64_t>{source_id}));
if (surfaces_.size() > kMaxTrackedSurfaces) {
// Remove the smallest
surfaces_.erase(surfaces_.begin());
}
return true;
}
if (it->second.contains(source_id))
return false;
it->second.insert(source_id);
return true;
}
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_PRIVACY_BUDGET_SAMPLED_SURFACE_TRACKER_H_
#define CHROME_BROWSER_PRIVACY_BUDGET_SAMPLED_SURFACE_TRACKER_H_
#include <map>
#include "base/containers/flat_set.h"
#include "base/time/time.h"
class SampledSurfaceTracker {
public:
// Maximum number of surfaces that this class can track. Prevents unbounded
// memory growth.
static constexpr unsigned kMaxTrackedSurfaces = 1000;
SampledSurfaceTracker();
~SampledSurfaceTracker();
bool ShouldRecord(uint64_t source_id, uint64_t surface);
void Reset();
private:
using HashKey = uint64_t;
// We use std::map since it makes it fast to remove the minimum.
std::map<HashKey, base::flat_set<uint64_t>> surfaces_;
uint64_t seed_;
};
#endif // CHROME_BROWSER_PRIVACY_BUDGET_SAMPLED_SURFACE_TRACKER_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/privacy_budget/sampled_surface_tracker.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/common/privacy_budget/identifiable_surface.h"
namespace {
uint64_t metric(int i) {
return blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kWebFeature, i)
.ToUkmMetricHash();
}
} // namespace
TEST(SampledSurfaceTrackerTest, Dedup) {
SampledSurfaceTracker t;
EXPECT_TRUE(t.ShouldRecord(0, metric(1)));
EXPECT_FALSE(t.ShouldRecord(0, metric(1)));
EXPECT_TRUE(t.ShouldRecord(1, metric(1)));
}
TEST(SampledSurfaceTrackerTest, SizeLimit) {
SampledSurfaceTracker t;
for (uint64_t i = 0; i < SampledSurfaceTracker::kMaxTrackedSurfaces; i++) {
EXPECT_TRUE(t.ShouldRecord(0, metric(i))) << ": 0," << i;
}
// Now the tracker should be full, but we will still allow new sources.
for (uint64_t i = 0; i < SampledSurfaceTracker::kMaxTrackedSurfaces; i++) {
EXPECT_FALSE(t.ShouldRecord(0, metric(i))) << ": 0," << i;
EXPECT_TRUE(t.ShouldRecord(1, metric(i))) << ": 1," << i;
}
// Add an extra one. This should bump one of the surfaces out.
t.ShouldRecord(0, SampledSurfaceTracker::kMaxTrackedSurfaces + 1);
// We expect only kMaxTrackedSurfaces to return true for a new surface.
unsigned num_true = 0;
for (uint64_t i = 0; i < SampledSurfaceTracker::kMaxTrackedSurfaces + 1;
i++) {
if (t.ShouldRecord(2, metric(i)))
num_true++;
}
EXPECT_EQ(SampledSurfaceTracker::kMaxTrackedSurfaces, num_true);
}
TEST(SampledSurfaceTrackerTest, Reset) {
SampledSurfaceTracker t;
EXPECT_TRUE(t.ShouldRecord(0, metric(0))) << ": 0,0";
EXPECT_FALSE(t.ShouldRecord(0, metric(0))) << ": 0,0";
t.Reset();
EXPECT_TRUE(t.ShouldRecord(0, metric(0))) << ": 0,0";
}
TEST(SampledSurfaceTrackerTest, InvalidMetric) {
SampledSurfaceTracker t;
EXPECT_FALSE(t.ShouldRecord(
0, blink::IdentifiableSurface::FromTypeAndInput(
blink::IdentifiableSurface::Type::kReservedInternal, 1)
.ToUkmMetricHash()));
}
......@@ -97,10 +97,6 @@ class IdentifiableSurface {
// Extension running content-script.
kExtensionContentScript = 6,
// Represents making a measurement of one of the above surfacess. This
// metric is retained even if filtering discards the surface.
kMeasuredSurface = 7,
// We can use values up to and including |kMax|.
kMax = (1 << kTypeBits) - 1
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment