Commit 880b6677 authored by Tarun Bansal's avatar Tarun Bansal Committed by Commit Bot

Remove usage of minimum site engagement threshold in hints fetcher

Remove usage of minimum site engagement threshold in hints fetcher
only if the data saver top host blocklist was generated long time
back.

If the blocklist was generated long time back, we expect the
hosts that did not make it to blocklist would be either
navigated to or would be dropped off by the site engagement service.

Change-Id: I97d19fb0c9a621e286b94a5e266ad0edb5238133
Bug: 1003432
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1800740Reviewed-by: default avatarMichael Crouse <mcrouse@chromium.org>
Commit-Queue: Tarun Bansal <tbansal@chromium.org>
Cr-Commit-Position: refs/heads/master@{#696173}
parent aa949b89
......@@ -7,6 +7,7 @@
#include <algorithm>
#include "base/metrics/histogram_macros.h"
#include "base/time/default_clock.h"
#include "base/values.h"
#include "chrome/browser/engagement/site_engagement_details.mojom.h"
#include "chrome/browser/engagement/site_engagement_score.h"
......@@ -25,6 +26,7 @@
namespace {
// Returns true if hints can't be fetched for |host|.
bool IsHostBlacklisted(const base::DictionaryValue* top_host_blacklist,
const std::string& host) {
if (!top_host_blacklist)
......@@ -58,14 +60,18 @@ bool IsPermittedToUseTopHostProvider(content::BrowserContext* browser_context) {
std::unique_ptr<DataSaverTopHostProvider>
DataSaverTopHostProvider::CreateIfAllowed(
content::BrowserContext* browser_context) {
if (IsPermittedToUseTopHostProvider(browser_context))
return std::make_unique<DataSaverTopHostProvider>(browser_context);
if (IsPermittedToUseTopHostProvider(browser_context)) {
return std::make_unique<DataSaverTopHostProvider>(
browser_context, base::DefaultClock::GetInstance());
}
return nullptr;
}
DataSaverTopHostProvider::DataSaverTopHostProvider(
content::BrowserContext* browser_context)
content::BrowserContext* browser_context,
base::Clock* time_clock)
: browser_context_(browser_context),
time_clock_(time_clock),
pref_service_(Profile::FromBrowserContext(browser_context_)->GetPrefs()) {
}
......@@ -100,6 +106,10 @@ void DataSaverTopHostProvider::InitializeHintsFetcherTopHostBlacklist() {
return lhs.total_score > rhs.total_score;
});
pref_service_->SetDouble(
optimization_guide::prefs::kTimeBlacklistLastInitialized,
time_clock_->Now().ToDeltaSinceWindowsEpoch().InSecondsF());
for (const auto& detail : engagement_details) {
if (top_host_blacklist->size() >=
optimization_guide::features::MaxHintsFetcherTopHostBlacklistSize()) {
......@@ -217,6 +227,18 @@ std::vector<std::string> DataSaverTopHostProvider::GetTopHosts(
DCHECK(browser_context_);
DCHECK(pref_service_);
// It's possible that the blacklist is initialized but
// kTimeBlacklistLastInitialized pref is not populated. This may happen since
// the logic to populate kTimeBlacklistLastInitialized pref was added in a
// later Chrome version. In that case, set kTimeBlacklistLastInitialized to
// the conservative value of current time.
if (pref_service_->GetDouble(
optimization_guide::prefs::kTimeBlacklistLastInitialized) == 0) {
pref_service_->SetDouble(
optimization_guide::prefs::kTimeBlacklistLastInitialized,
time_clock_->Now().ToDeltaSinceWindowsEpoch().InSecondsF());
}
if (GetCurrentBlacklistState() ==
optimization_guide::prefs::HintsFetcherTopHostBlacklistState::
kNotInitialized) {
......@@ -263,21 +285,38 @@ std::vector<std::string> DataSaverTopHostProvider::GetTopHosts(
return lhs.total_score > rhs.total_score;
});
base::Time blacklist_initialized_time =
base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromSecondsD(pref_service_->GetDouble(
optimization_guide::prefs::kTimeBlacklistLastInitialized)));
base::TimeDelta duration_since_blacklist_initialized =
(time_clock_->Now() - blacklist_initialized_time);
for (const auto& detail : engagement_details) {
if (top_hosts.size() >= max_sites)
return top_hosts;
// TODO(b/968542): Skip origins that are local hosts (e.g., IP addresses,
// localhost:8080 etc.).
if (!detail.origin.SchemeIs(url::kHttpsScheme))
continue;
// Once the engagement score is less than the initial engagement score for a
// newly navigated host, return the current set of top hosts. This threshold
// prevents hosts that have not been engaged recently from having hints
// requested for them. The engagement_details are sorted above in descending
// order by engagement score.
if (detail.total_score <
optimization_guide::features::MinTopHostEngagementScoreThreshold())
// This filtering is applied only if the the blacklist was initialized
// recently. If the blacklist was initialized too far back in time, hosts
// that could not make it to blacklist should have either been navigated to
// or would have fallen off the blacklist.
if (duration_since_blacklist_initialized <=
optimization_guide::features::
DurationApplyLowEngagementScoreThreshold() &&
detail.total_score < optimization_guide::features::
MinTopHostEngagementScoreThreshold()) {
return top_hosts;
// TODO(b/968542): Skip origins that are local hosts (e.g., IP addresses,
// localhost:8080 etc.).
if (detail.origin.SchemeIs(url::kHttpsScheme) &&
!IsHostBlacklisted(top_host_blacklist, detail.origin.host())) {
}
if (!IsHostBlacklisted(top_host_blacklist, detail.origin.host())) {
top_hosts.push_back(detail.origin.host());
}
}
......
......@@ -11,6 +11,7 @@
#include "base/macros.h"
#include "base/sequence_checker.h"
#include "base/time/clock.h"
#include "base/values.h"
#include "components/optimization_guide/optimization_guide_prefs.h"
#include "components/optimization_guide/top_host_provider.h"
......@@ -31,7 +32,8 @@ class DataSaverTopHostProvider : public optimization_guide::TopHostProvider {
// OptimizationGuideKeyedService is fully rolled out. All future callers
// should be using the CreateIfAllowed() factory method instead, which
// validates if the user has the proper permissions to use this class.
explicit DataSaverTopHostProvider(content::BrowserContext* BrowserContext);
DataSaverTopHostProvider(content::BrowserContext* BrowserContext,
base::Clock* time_clock);
~DataSaverTopHostProvider() override;
// Creates a DataSaverTopHostProvider if the user is a Data Saver user and has
......@@ -71,6 +73,9 @@ class DataSaverTopHostProvider : public optimization_guide::TopHostProvider {
// of |this|.
content::BrowserContext* browser_context_;
// Clock used for getting current time.
base::Clock* time_clock_;
// |pref_service_| provides information about the current profile's
// settings. It is not owned and guaranteed to outlive |this|.
PrefService* pref_service_;
......
......@@ -4,6 +4,8 @@
#include "chrome/browser/data_saver/data_saver_top_host_provider.h"
#include "base/test/simple_test_clock.h"
#include "base/time/default_clock.h"
#include "base/values.h"
#include "chrome/browser/engagement/site_engagement_score.h"
#include "chrome/browser/engagement/site_engagement_service.h"
......@@ -28,8 +30,12 @@ class DataSaverTopHostProviderTest : public ChromeRenderViewHostTestHarness {
public:
void SetUp() override {
ChromeRenderViewHostTestHarness::SetUp();
// Advance by 1-day to avoid running into null checks.
test_clock_.Advance(base::TimeDelta::FromDays(1));
top_host_provider_ =
std::make_unique<DataSaverTopHostProvider>(profile(), &test_clock_);
top_host_provider_ = std::make_unique<DataSaverTopHostProvider>(profile());
service_ = SiteEngagementService::Get(profile());
pref_service_ = profile()->GetPrefs();
......@@ -149,6 +155,8 @@ class DataSaverTopHostProviderTest : public ChromeRenderViewHostTestHarness {
return top_host_provider_.get();
}
base::SimpleTestClock test_clock_;
private:
std::unique_ptr<DataSaverTopHostProvider> top_host_provider_;
std::unique_ptr<data_reduction_proxy::DataReductionProxyTestContext>
......@@ -395,21 +403,25 @@ TEST_F(DataSaverTopHostProviderTest, IntializeTopHostBlacklistWithMaxTopSites) {
TEST_F(DataSaverTopHostProviderTest, TopHostsFilteredByEngagementThreshold) {
size_t engaged_hosts =
optimization_guide::features::MaxHintsFetcherTopHostBlacklistSize() + 1;
size_t max_top_hosts =
optimization_guide::features::MaxHintsFetcherTopHostBlacklistSize() + 1;
// Set the count of maximum hosts requested to a large number so that the
// count itself does not prevent top_host_provider() from returning hosts that
// it would have otherwise returned.
static const size_t kMaxHostsRequested = INT16_MAX;
AddEngagedHosts(engaged_hosts);
// Add two hosts with very low engagement scores that should not be returned
// by the top host provider.
AddEngagedHost(GURL("https://lowengagement.com"), 1);
AddEngagedHost(GURL("https://lowengagement1.com"), 1);
AddEngagedHost(GURL("https://lowengagement2.com"), 1);
// Blacklist should be populated on the first request.
// Blacklist should be populated on the first request. Set the count of
// desired
std::vector<std::string> hosts =
top_host_provider()->GetTopHosts(max_top_hosts);
top_host_provider()->GetTopHosts(kMaxHostsRequested);
EXPECT_EQ(hosts.size(), 0u);
hosts = top_host_provider()->GetTopHosts(max_top_hosts);
hosts = top_host_provider()->GetTopHosts(kMaxHostsRequested);
EXPECT_EQ(
hosts.size(),
engaged_hosts -
......@@ -420,8 +432,37 @@ TEST_F(DataSaverTopHostProviderTest, TopHostsFilteredByEngagementThreshold) {
// The hosts with engagement scores below the minimum threshold should not be
// returned.
EXPECT_EQ(std::find(hosts.begin(), hosts.end(), "lowengagement.com"),
EXPECT_EQ(std::find(hosts.begin(), hosts.end(), "lowengagement1.com"),
hosts.end());
EXPECT_EQ(std::find(hosts.begin(), hosts.end(), "lowengagement2.com"),
hosts.end());
// Advance the clock by more than DurationApplyLowEngagementScoreThreshold().
// top_host_provider() should also return hosts with low engagement score.
test_clock_.Advance(
optimization_guide::features::DurationApplyLowEngagementScoreThreshold() +
base::TimeDelta::FromDays(1));
AddEngagedHost(GURL("https://lowengagement3.com"), 1);
AddEngagedHost(GURL("https://lowengagement4.com"), 1);
hosts = top_host_provider()->GetTopHosts(kMaxHostsRequested);
// Four hosts lowengagement[1-4] should now be present in |hosts|.
EXPECT_EQ(
engaged_hosts + 4 -
optimization_guide::features::MaxHintsFetcherTopHostBlacklistSize(),
hosts.size());
EXPECT_EQ(GetCurrentTopHostBlacklistState(),
optimization_guide::prefs::HintsFetcherTopHostBlacklistState::
kInitialized);
// The hosts with engagement scores below the minimum threshold should not be
// returned.
EXPECT_NE(std::find(hosts.begin(), hosts.end(), "lowengagement1.com"),
hosts.end());
EXPECT_NE(std::find(hosts.begin(), hosts.end(), "lowengagement2.com"),
hosts.end());
EXPECT_NE(std::find(hosts.begin(), hosts.end(), "lowengagement3.com"),
hosts.end());
EXPECT_NE(std::find(hosts.begin(), hosts.end(), "lowengagement4.com"),
hosts.end());
}
......@@ -164,8 +164,9 @@ PreviewsService::GetAllowedPreviews() {
}
PreviewsService::PreviewsService(content::BrowserContext* browser_context)
: top_host_provider_(
std::make_unique<DataSaverTopHostProvider>(browser_context)),
: top_host_provider_(std::make_unique<DataSaverTopHostProvider>(
browser_context,
base::DefaultClock::GetInstance())),
previews_lite_page_decider_(
std::make_unique<PreviewsLitePageDecider>(browser_context)),
previews_offline_helper_(
......
......@@ -92,6 +92,12 @@ base::TimeDelta StoredFetchedHintsFreshnessDuration() {
"max_store_duration_for_featured_hints_in_days", 7));
}
base::TimeDelta DurationApplyLowEngagementScoreThreshold() {
return base::TimeDelta::FromDays(GetFieldTrialParamByFeatureAsInt(
features::kOptimizationHintsFetching,
"duration_apply_low_engagement_score_threshold_in_days", 30));
}
std::string GetOptimizationGuideServiceAPIKey() {
// Command line override takes priority.
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
......
......@@ -45,6 +45,12 @@ double MinTopHostEngagementScoreThreshold();
// to be used and remain in the HintCacheStore.
base::TimeDelta StoredFetchedHintsFreshnessDuration();
// The duration of time after the blacklist initialization for which the low
// engagement score threshold needs to be applied. If the blacklist was
// initialized more than DurationApplyLowEngagementScoreThreshold() ago, then
// the low engagement score threshold need not be applied.
base::TimeDelta DurationApplyLowEngagementScoreThreshold();
// The API key for the One Platform Optimization Guide Service.
std::string GetOptimizationGuideServiceAPIKey();
......
......@@ -28,6 +28,11 @@ const char kHintsFetcherDataSaverTopHostBlacklist[] =
const char kHintsFetcherDataSaverTopHostBlacklistState[] =
"optimization_guide.hintsfetcher.top_host_blacklist_state";
// Time when the blacklist was last initialized. Recorded as seconds since
// epoch.
const char kTimeBlacklistLastInitialized[] =
"optimization_guide.hintsfetcher.time_blacklist_last_initialized";
// A dictionary pref that stores hosts that have had hints successfully fetched
// from the remote Optimization Guide Server. The entry for each host contains
// the time that the fetch that covered this host expires, i.e., any hints
......@@ -55,6 +60,8 @@ void RegisterProfilePrefs(PrefRegistrySimple* registry) {
kHintsFetcherDataSaverTopHostBlacklistState,
static_cast<int>(HintsFetcherTopHostBlacklistState::kNotInitialized),
PrefRegistry::LOSSY_PREF);
registry->RegisterDoublePref(kTimeBlacklistLastInitialized, 0,
PrefRegistry::LOSSY_PREF);
registry->RegisterStringPref(kPendingHintsProcessingVersion, "",
PrefRegistry::LOSSY_PREF);
}
......
......@@ -15,6 +15,7 @@ namespace prefs {
extern const char kHintsFetcherLastFetchAttempt[];
extern const char kHintsFetcherDataSaverTopHostBlacklist[];
extern const char kHintsFetcherDataSaverTopHostBlacklistState[];
extern const char kTimeBlacklistLastInitialized[];
extern const char kHintsFetcherHostsSuccessfullyFetched[];
extern const char kPendingHintsProcessingVersion[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment