Commit 39255c61 authored by Yue Ru Sun's avatar Yue Ru Sun Committed by Commit Bot

Purge extension URL recordings on sync disabled

When the extension sync is disabled, purge all sources whose URL has `chrome-extension` scheme and all events from these sources, from both in-memory recordings as well as compressed reports in the unsent log store.

Bug: 948881
Change-Id: I67bc5bd7c4a0b9c9ed02353dddceaf090eb655c4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1829953
Commit-Queue: Yue Ru Sun <yrsun@chromium.org>
Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarRobert Kaplow <rkaplow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#707849}
parent 52fe599d
......@@ -1024,7 +1024,10 @@ void ChromeMetricsServiceClient::OnSyncPrefsChanged(bool must_purge) {
if (must_purge) {
ukm_service_->Purge();
ukm_service_->ResetClientState(ukm::ResetReason::kOnSyncPrefsChanged);
} else if (!SyncStateAllowsExtensionUkm()) {
ukm_service_->PurgeExtensions();
}
// Signal service manager to enable/disable UKM based on new state.
UpdateRunningServices();
}
......
......@@ -133,6 +133,7 @@ jumbo_static_library("metrics") {
public_deps = [
"//third_party/metrics_proto",
"//third_party/zlib/google:compression_utils",
]
deps = [
......@@ -144,7 +145,6 @@ jumbo_static_library("metrics") {
"//components/version_info:version_info",
"//crypto",
"//extensions/buildflags",
"//third_party/zlib/google:compression_utils",
"//url",
]
......
......@@ -167,6 +167,28 @@ void UnsentLogStore::StoreLog(const std::string& log_data) {
signing_key_);
}
const std::string& UnsentLogStore::GetLogAtIndex(size_t index) {
DCHECK_GE(index, 0U);
DCHECK_LT(index, list_.size());
return list_[index].compressed_log_data;
}
std::string UnsentLogStore::ReplaceLogAtIndex(size_t index,
const std::string& new_log_data) {
DCHECK_GE(index, 0U);
DCHECK_LT(index, list_.size());
// Avoid copying of long strings.
std::string old_log_data;
old_log_data.swap(list_[index].compressed_log_data);
std::string old_timestamp;
old_timestamp.swap(list_[index].timestamp);
list_[index] = LogInfo();
list_[index].Init(metrics_.get(), new_log_data, old_timestamp, signing_key_);
return old_log_data;
}
void UnsentLogStore::Purge() {
if (has_staged_log()) {
DiscardStagedLog();
......
......@@ -62,7 +62,14 @@ class UnsentLogStore : public LogStore {
// Adds a log to the list.
void StoreLog(const std::string& log_data);
// Delete all logs, in memory and on disk.
// Gets log data at the given index in the list.
const std::string& GetLogAtIndex(size_t index);
// Replaces the compressed log at |index| in the store with given log data
// reusing the same timestamp from the original log, and returns old log data.
std::string ReplaceLogAtIndex(size_t index, const std::string& new_log_data);
// Deletes all logs, in memory and on disk.
void Purge();
// Returns the timestamp of the element in the front of the list.
......
......@@ -11,8 +11,8 @@ static_library("ukm") {
sources = [
"app_source_url_recorder.cc",
"app_source_url_recorder.h",
"unsent_log_store_metrics_impl.cc",
"unsent_log_store_metrics_impl.h",
"scheme_constants.cc",
"scheme_constants.h",
"ukm_pref_names.cc",
"ukm_pref_names.h",
"ukm_recorder_impl.cc",
......@@ -23,6 +23,8 @@ static_library("ukm") {
"ukm_rotation_scheduler.h",
"ukm_service.cc",
"ukm_service.h",
"unsent_log_store_metrics_impl.cc",
"unsent_log_store_metrics_impl.h",
]
public_deps = [
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ukm/scheme_constants.h"
namespace ukm {
const char kAppScheme[] = "app";
const char kChromeUIScheme[] = "chrome";
const char kExtensionScheme[] = "chrome-extension";
} // namespace ukm
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_UKM_SCHEME_CONSTANTS_H_
#define COMPONENTS_UKM_SCHEME_CONSTANTS_H_
namespace ukm {
// Defines several URL scheme constants to avoid dependencies.
// kAppScheme will be defined in code that isn't available here.
extern const char kAppScheme[];
// kChromeUIScheme is defined in content, which this code can't depend on
// since it's used by iOS too.
extern const char kChromeUIScheme[];
// kExtensionScheme is defined in extensions which also isn't available here.
extern const char kExtensionScheme[];
} // namespace ukm
#endif // COMPONENTS_UKM_SCHEME_CONSTANTS_H_
......@@ -7,6 +7,7 @@
#include <stddef.h>
#include <map>
#include <memory>
#include <set>
#include <utility>
......
......@@ -18,6 +18,7 @@
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "components/ukm/scheme_constants.h"
#include "components/variations/variations_associated_data.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_decode.h"
......@@ -33,14 +34,6 @@ namespace ukm {
namespace {
// Note: kChromeUIScheme is defined in content, which this code can't
// depend on - since it's used by iOS too. kExtensionScheme is defined
// in extensions which also isn't always available here. kAppScheme
// will be defined in code that isn't available here.
const char kChromeUIScheme[] = "chrome";
const char kExtensionScheme[] = "chrome-extension";
const char kAppScheme[] = "app";
const base::Feature kUkmSamplingRateFeature{"UkmSamplingRate",
base::FEATURE_DISABLED_BY_DEFAULT};
......@@ -159,7 +152,7 @@ void AppendWhitelistedUrls(
}
}
bool HasUnknownMetrics(const ukm::builders::DecodeMap& decode_map,
bool HasUnknownMetrics(const builders::DecodeMap& decode_map,
const mojom::UkmEntry& entry) {
const auto it = decode_map.find(entry.event_hash);
if (it == decode_map.end())
......@@ -260,6 +253,32 @@ void UkmRecorderImpl::Purge() {
recording_is_continuous_ = false;
}
void UkmRecorderImpl::PurgeExtensionRecordings() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// Discard all sources that have an extension URL as well as all the entries
// related to any of these sources.
std::unordered_set<SourceId> extension_source_ids;
for (const auto& kv : recordings_.sources) {
if (kv.second->url().SchemeIs(kExtensionScheme)) {
extension_source_ids.insert(kv.first);
}
}
for (const auto source_id : extension_source_ids) {
recordings_.sources.erase(source_id);
}
std::vector<mojom::UkmEntryPtr>& events = recordings_.entries;
events.erase(
std::remove_if(events.begin(), events.end(),
[&](const auto& event) {
return extension_source_ids.count(event->source_id);
}),
events.end());
recording_is_continuous_ = false;
}
void UkmRecorderImpl::MarkSourceForDeletion(SourceId source_id) {
if (source_id == kInvalidSourceId)
return;
......@@ -295,7 +314,7 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
// Number of sources discarded due to not matching a navigation URL.
int num_sources_unmatched = 0;
std::unordered_map<ukm::SourceIdType, int> serialized_source_type_counts;
std::unordered_map<SourceIdType, int> serialized_source_type_counts;
for (const auto& kv : recordings_.sources) {
// Don't keep sources of these types after current report because their
......@@ -385,15 +404,14 @@ void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnmatchedSourcesCount",
num_sources_unmatched);
UMA_HISTOGRAM_COUNTS_1000(
"UKM.Sources.SerializedCount2.Ukm",
serialized_source_type_counts[ukm::SourceIdType::UKM]);
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2.Ukm",
serialized_source_type_counts[SourceIdType::UKM]);
UMA_HISTOGRAM_COUNTS_1000(
"UKM.Sources.SerializedCount2.Navigation",
serialized_source_type_counts[ukm::SourceIdType::NAVIGATION_ID]);
serialized_source_type_counts[SourceIdType::NAVIGATION_ID]);
UMA_HISTOGRAM_COUNTS_1000(
"UKM.Sources.SerializedCount2.App",
serialized_source_type_counts[ukm::SourceIdType::APP_ID]);
serialized_source_type_counts[SourceIdType::APP_ID]);
// We record a UMA metric specifically for the number of serialized events
// with the FCP metric. This is for data quality verification.
......@@ -482,12 +500,10 @@ int UkmRecorderImpl::PruneOldSources(size_t max_kept_sources) {
if (recordings_.sources.size() <= max_kept_sources)
return 0;
std::vector<std::pair<base::TimeTicks, ukm::SourceId>>
timestamp_source_id_pairs;
std::vector<std::pair<base::TimeTicks, SourceId>> timestamp_source_id_pairs;
for (const auto& kv : recordings_.sources) {
timestamp_source_id_pairs.push_back(
std::pair<base::TimeTicks, ukm::SourceId>(kv.second->creation_time(),
kv.first));
std::make_pair(kv.second->creation_time(), kv.first));
}
// Partially sort so that the last |max_kept_sources| elements are the
// newest.
......@@ -740,7 +756,7 @@ void UkmRecorderImpl::StoreWhitelistedEntries() {
base::SPLIT_WANT_NONEMPTY);
for (const auto& entry_string : entries)
whitelisted_entry_hashes_.insert(base::HashMetricName(entry_string));
decode_map_ = ::ukm::builders::CreateDecodeMap();
decode_map_ = builders::CreateDecodeMap();
}
} // namespace ukm
......@@ -68,6 +68,9 @@ class UkmRecorderImpl : public UkmRecorder {
// Deletes stored recordings.
void Purge();
// Deletes stored recordings related to Chrome extensions.
void PurgeExtensionRecordings();
// Marks a source as no longer needed to be kept alive in memory. The source
// with given id will be removed from in-memory recordings at the next
// reporting cycle.
......@@ -131,6 +134,7 @@ class UkmRecorderImpl : public UkmRecorder {
friend ::ukm::UkmRecorderImplTest;
friend ::ukm::UkmUtilsForTest;
FRIEND_TEST_ALL_PREFIXES(UkmRecorderImplTest, IsSampledIn);
FRIEND_TEST_ALL_PREFIXES(UkmRecorderImplTest, PurgeExtensionRecordings);
struct MetricAggregate {
uint64_t total_count = 0;
......
......@@ -7,14 +7,18 @@
#include "base/bind.h"
#include "base/metrics/ukm_source_id.h"
#include "base/test/scoped_feature_list.h"
#include "components/ukm/test_ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_entry_builder.h"
#include "services/metrics/public/cpp/ukm_source.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/metrics_proto/ukm/report.pb.h"
#include "url/gurl.h"
namespace ukm {
using TestEvent1 = builders::PageLoad;
TEST(UkmRecorderImplTest, IsSampledIn) {
UkmRecorderImpl impl;
......@@ -56,4 +60,50 @@ TEST(UkmRecorderImplTest, IsSampledIn) {
EXPECT_FALSE(impl.IsSampledIn(4, 2, 2));
}
TEST(UkmRecorderImplTest, PurgeExtensionRecordings) {
TestUkmRecorder recorder;
// Enable extension sync.
recorder.SetIsWebstoreExtensionCallback(
base::BindRepeating([](base::StringPiece) { return true; }));
// Record some sources and events.
SourceId id1 = ConvertToSourceId(1, SourceIdType::NAVIGATION_ID);
recorder.UpdateSourceURL(id1, GURL("https://www.google.ca"));
SourceId id2 = ConvertToSourceId(2, SourceIdType::NAVIGATION_ID);
recorder.UpdateSourceURL(id2, GURL("chrome-extension://abc/manifest.json"));
SourceId id3 = ConvertToSourceId(3, SourceIdType::NAVIGATION_ID);
recorder.UpdateSourceURL(id3, GURL("http://www.wikipedia.org"));
SourceId id4 = ConvertToSourceId(4, SourceIdType::NAVIGATION_ID);
recorder.UpdateSourceURL(id4, GURL("chrome-extension://abc/index.html"));
TestEvent1(id1).Record(&recorder);
TestEvent1(id2).Record(&recorder);
// All sources and events have been recorded.
EXPECT_TRUE(recorder.extensions_enabled_);
EXPECT_TRUE(recorder.recording_is_continuous_);
EXPECT_EQ(4U, recorder.sources().size());
EXPECT_EQ(2U, recorder.entries().size());
recorder.PurgeExtensionRecordings();
// Recorded sources of extension scheme and related events have been cleared.
EXPECT_EQ(2U, recorder.sources().size());
EXPECT_EQ(1U, recorder.sources().count(id1));
EXPECT_EQ(0U, recorder.sources().count(id2));
EXPECT_EQ(1U, recorder.sources().count(id3));
EXPECT_EQ(0U, recorder.sources().count(id4));
EXPECT_FALSE(recorder.recording_is_continuous_);
EXPECT_EQ(1U, recorder.entries().size());
EXPECT_EQ(id1, recorder.entries()[0]->source_id);
// Recording is disabled for extensions, thus new extension URL will not be
// recorded.
recorder.EnableRecording(/* extensions = */ false);
recorder.UpdateSourceURL(id4, GURL("chrome-extension://abc/index.html"));
EXPECT_FALSE(recorder.extensions_enabled_);
EXPECT_EQ(2U, recorder.sources().size());
}
} // namespace ukm
......@@ -111,4 +111,4 @@ void UkmReportingService::LogSuccess(size_t log_size) {
void UkmReportingService::LogLargeRejection(size_t log_size) {}
} // namespace metrics
} // namespace ukm
......@@ -6,6 +6,7 @@
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include "base/bind.h"
......@@ -21,11 +22,13 @@
#include "components/metrics/ukm_demographic_metrics_provider.h"
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
#include "components/ukm/scheme_constants.h"
#include "components/ukm/ukm_pref_names.h"
#include "components/ukm/ukm_rotation_scheduler.h"
#include "services/metrics/public/cpp/delegating_ukm_recorder.h"
#include "third_party/metrics_proto/ukm/report.pb.h"
#include "third_party/metrics_proto/user_demographics.pb.h"
#include "third_party/zlib/google/compression_utils.h"
namespace ukm {
......@@ -73,6 +76,94 @@ int32_t LoadAndIncrementSessionId(PrefService* pref_service) {
return session_id;
}
// Remove elements satisfying the predicate by moving them to the end of the
// list then truncate.
template <typename Predicate, typename ReadElements, typename WriteElements>
void FilterReportElements(Predicate predicate,
const ReadElements& elements,
WriteElements* mutable_elements) {
if (elements.empty())
return;
int entries_size = elements.size();
int start = 0;
int end = entries_size - 1;
while (start < end) {
while (start < entries_size && !predicate(elements.Get(start))) {
start++;
}
while (end >= 0 && predicate(elements.Get(end))) {
end--;
}
if (start < end) {
mutable_elements->SwapElements(start, end);
start++;
end--;
}
}
mutable_elements->DeleteSubrange(start, entries_size - start);
}
void PurgeExtensionDataFromUnsentLogStore(
metrics::UnsentLogStore* ukm_log_store) {
for (size_t index = 0; index < ukm_log_store->size(); index++) {
// Uncompress log data from store back into a Report.
const std::string& compressed_log_data =
ukm_log_store->GetLogAtIndex(index);
std::string uncompressed_log_data;
const bool uncompress_successful = compression::GzipUncompress(
compressed_log_data, &uncompressed_log_data);
DCHECK(uncompress_successful);
Report report;
const bool report_parse_successful =
report.ParseFromString(uncompressed_log_data);
DCHECK(report_parse_successful);
std::unordered_set<SourceId> extension_source_ids;
// Grab all extension-related source ids.
for (const auto& source : report.sources()) {
// Check if any URL on the source has extension scheme. It is possible
// that only one of multiple URLs does due to redirect, in this case, we
// should still purge the source.
for (const auto& url_info : source.urls()) {
if (GURL(url_info.url()).SchemeIs(kExtensionScheme)) {
extension_source_ids.insert(source.id());
break;
}
}
}
if (extension_source_ids.empty())
continue;
// Remove all extension-related sources from the report.
FilterReportElements(
[&](const Source& element) {
return extension_source_ids.count(element.id());
},
report.sources(), report.mutable_sources());
// Remove all entries originating from extension-related sources.
FilterReportElements(
[&](const Entry& element) {
return extension_source_ids.count(element.source_id());
},
report.entries(), report.mutable_entries());
std::string reserialized_log_data;
report.SerializeToString(&reserialized_log_data);
// Replace the compressed log in the store by its filtered version.
const std::string old_compressed_log_data =
ukm_log_store->ReplaceLogAtIndex(index, reserialized_log_data);
// Reached here only if extensions were found in the log, so data should now
// be different after filtering.
DCHECK(ukm_log_store->GetLogAtIndex(index) != old_compressed_log_data);
}
}
} // namespace
UkmService::UkmService(PrefService* pref_service,
......@@ -99,8 +190,8 @@ UkmService::UkmService(PrefService* pref_service,
const base::Callback<base::TimeDelta(void)>& get_upload_interval_callback =
base::Bind(&metrics::MetricsServiceClient::GetUploadInterval,
base::Unretained(client_));
scheduler_.reset(new ukm::UkmRotationScheduler(rotate_callback,
get_upload_interval_callback));
scheduler_.reset(
new UkmRotationScheduler(rotate_callback, get_upload_interval_callback));
StoreWhitelistedEntries();
......@@ -196,6 +287,16 @@ void UkmService::Purge() {
UkmRecorderImpl::Purge();
}
void UkmService::PurgeExtensions() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DVLOG(1) << "UkmService::PurgeExtensions";
// Filter out any extension-related data from the serialized logs in the
// UnsentLogStore for uploading.
PurgeExtensionDataFromUnsentLogStore(reporting_service_.ukm_log_store());
// Purge data currently in the recordings intended for the next ukm::Report.
UkmRecorderImpl::PurgeExtensionRecordings();
}
void UkmService::ResetClientState(ResetReason reason) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
......
......@@ -88,6 +88,9 @@ class UkmService : public UkmRecorderImpl {
// Deletes any unsent local data.
void Purge();
// Deletes any unsent local data related to Chrome extensions.
void PurgeExtensions();
// Resets the client prefs (client_id/session_id). |reason| should be passed
// to provide the reason of the reset - this is only used for UMA logging.
void ResetClientState(ResetReason reason);
......@@ -121,6 +124,8 @@ class UkmService : public UkmRecorderImpl {
TestRegisterUKMProvidersWhenForceMetricsReporting);
FRIEND_TEST_ALL_PREFIXES(::IOSChromeMetricsServiceClientTest,
TestRegisterUKMProvidersWhenUKMFeatureEnabled);
FRIEND_TEST_ALL_PREFIXES(UkmServiceTest,
PurgeExtensionDataFromUnsentLogStore);
// Starts metrics client initialization.
void StartInitTask();
......
......@@ -249,6 +249,70 @@ TEST_F(UkmServiceTest, Purge) {
EXPECT_EQ(0, GetPersistedLogCount());
}
TEST_F(UkmServiceTest, PurgeExtensionDataFromUnsentLogStore) {
UkmService service(&prefs_, &client_,
true /* restrict_to_whitelisted_entries */,
std::make_unique<MockDemographicMetricsProvider>());
auto* unsent_log_store = service.reporting_service_.ukm_log_store();
// Initialize a Report to be saved to the log store.
ukm::Report report;
report.set_client_id(1);
report.set_session_id(1);
report.set_report_id(1);
std::string non_extension_url = "https://www.google.ca";
std::string extension_url =
"chrome-extension://bmnlcjabgnpnenekpadlanbbkooimhnj/manifest.json";
// Add both extension- and non-extension-related sources to the Report.
ukm::Source* proto_source_1 = report.add_sources();
ukm::SourceId source_id_1 =
ukm::ConvertToSourceId(1, ukm::SourceIdType::NAVIGATION_ID);
proto_source_1->set_id(source_id_1);
proto_source_1->add_urls()->set_url(non_extension_url);
ukm::Source* proto_source_2 = report.add_sources();
ukm::SourceId source_id_2 =
ukm::ConvertToSourceId(2, ukm::SourceIdType::NAVIGATION_ID);
proto_source_2->set_id(source_id_2);
proto_source_2->add_urls()->set_url(extension_url);
// Add some entries for both sources.
ukm::Entry* entry_1 = report.add_entries();
entry_1->set_source_id(source_id_2);
ukm::Entry* entry_2 = report.add_entries();
entry_2->set_source_id(source_id_1);
ukm::Entry* entry_3 = report.add_entries();
entry_3->set_source_id(source_id_2);
// Save the Report to the store.
std::string serialized_log;
report.SerializeToString(&serialized_log);
unsent_log_store->StoreLog(serialized_log);
// Do extension purging.
service.PurgeExtensions();
// Get the Report in the log store and verify extension-related data have been
// filtered.
unsent_log_store->StageNextLog();
const std::string& compressed_log_data = unsent_log_store->staged_log();
std::string uncompressed_log_data;
compression::GzipUncompress(compressed_log_data, &uncompressed_log_data);
ukm::Report filtered_report;
filtered_report.ParseFromString(uncompressed_log_data);
// Only proto_source_1 with non-extension URL is kept.
EXPECT_EQ(1, filtered_report.sources_size());
EXPECT_EQ(source_id_1, filtered_report.sources(0).id());
EXPECT_EQ(non_extension_url, filtered_report.sources(0).urls(0).url());
// Only entry_2 from the non-extension source is kept.
EXPECT_EQ(1, filtered_report.entries_size());
EXPECT_EQ(source_id_1, filtered_report.entries(0).source_id());
}
TEST_F(UkmServiceTest, SourceSerialization) {
UkmService service(&prefs_, &client_,
true /* restrict_to_whitelisted_entries */,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment