Commit f1674b1c authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

Lookalike URLs: Add a per-profile service to fetch engaged sites

We currently fetch the list of engaged sites on every navigation on the UI
thread. This is slow. We also do this separately for each tab.

This CL introduces a profile keyed service called LookalikeUrlService. This
service fetches the list of engaged sites every 5 minutes in a background thread
and stores the results until the next update. It also gets rid of the need to do
a fetch for each tab separately.

Bug: 913647
Change-Id: I9f7080c45834de576eb081243778f2f17c3e4ccd
Reviewed-on: https://chromium-review.googlesource.com/c/1389167
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarDominick Ng <dominickn@chromium.org>
Reviewed-by: default avatarTommy Li <tommycli@chromium.org>
Reviewed-by: default avatarStefan Kuhne <skuhne@chromium.org>
Cr-Commit-Position: refs/heads/master@{#622111}
parent 520af7b3
......@@ -126,14 +126,6 @@ std::vector<mojom::SiteEngagementDetails> GetAllDetailsImpl(
return details;
}
// Takes a scoped_refptr to keep HostContentSettingsMap alive. See
// crbug.com/901287.
std::vector<mojom::SiteEngagementDetails> GetAllDetailsImplInBackground(
std::unique_ptr<base::Clock> clock,
scoped_refptr<HostContentSettingsMap> map) {
return GetAllDetailsImpl(clock.get(), map.get());
}
// Only accept a navigation event for engagement if it is one of:
// a. direct typed navigation
// b. clicking on an omnibox suggestion brought up by typing a keyword
......@@ -181,6 +173,15 @@ double SiteEngagementService::GetScoreFromSettings(
.GetTotalScore();
}
// static
std::vector<mojom::SiteEngagementDetails>
SiteEngagementService::GetAllDetailsInBackground(
base::Time now,
scoped_refptr<HostContentSettingsMap> map) {
StoppedClock clock(now);
return GetAllDetailsImpl(&clock, map.get());
}
SiteEngagementService::SiteEngagementService(Profile* profile)
: SiteEngagementService(profile, base::DefaultClock::GetInstance()) {
content::BrowserThread::PostAfterStartupTask(
......@@ -481,8 +482,7 @@ void SiteEngagementService::MaybeRecordMetrics() {
{base::TaskPriority::BEST_EFFORT,
base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
base::BindOnce(
&GetAllDetailsImplInBackground,
std::make_unique<StoppedClock>(clock_->Now()),
&GetAllDetailsInBackground, clock_->Now(),
base::WrapRefCounted(
HostContentSettingsMapFactory::GetForProfile(profile_))),
base::BindOnce(&SiteEngagementService::RecordMetrics,
......
......@@ -126,6 +126,13 @@ class SiteEngagementService : public KeyedService,
static double GetScoreFromSettings(HostContentSettingsMap* settings,
const GURL& origin);
// Retrieves all details. Can be called from a background thread. |now| must
// be the current timestamp. Takes a scoped_refptr to keep
// HostContentSettingsMap alive. See crbug.com/901287.
static std::vector<mojom::SiteEngagementDetails> GetAllDetailsInBackground(
base::Time now,
scoped_refptr<HostContentSettingsMap> map);
explicit SiteEngagementService(Profile* profile);
~SiteEngagementService() override;
......
......@@ -922,6 +922,8 @@ jumbo_split_static_library("ui") {
"omnibox/clipboard_utils.h",
"omnibox/lookalike_url_navigation_observer.cc",
"omnibox/lookalike_url_navigation_observer.h",
"omnibox/lookalike_url_service.cc",
"omnibox/lookalike_url_service.h",
"omnibox/omnibox_theme.cc",
"omnibox/omnibox_theme.h",
"page_action/page_action_icon_container.h",
......
......@@ -12,6 +12,7 @@
#include "chrome/browser/engagement/site_engagement_service.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/ui/omnibox/alternate_nav_infobar_delegate.h"
#include "chrome/browser/ui/omnibox/lookalike_url_service.h"
#include "chrome/common/chrome_features.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/ukm/content/source_url_recorder.h"
......@@ -62,7 +63,7 @@ std::string GetETLDPlusOne(const GURL& url) {
// |domain_and_registry| may be attempting to spoof, based on skeleton
// comparison.
std::string GetMatchingSiteEngagementDomain(
SiteEngagementService* service,
const std::vector<GURL>& engaged_sites,
const std::string& domain_and_registry) {
// Compute skeletons using eTLD+1.
// eTLD+1 can be empty for private domains.
......@@ -77,21 +78,16 @@ std::string GetMatchingSiteEngagementDomain(
std::map<std::string, url_formatter::Skeletons>
domain_and_registry_to_skeleton;
std::vector<mojom::SiteEngagementDetails> engagement_details =
service->GetAllDetails();
for (const auto& detail : engagement_details) {
// Ignore sites with an engagement score lower than LOW.
if (!service->IsEngagementAtLeast(detail.origin,
blink::mojom::EngagementLevel::MEDIUM))
continue;
// Site engagement service should only return HTTP or HTTPS domains.
DCHECK(detail.origin.SchemeIsHTTPOrHTTPS());
for (const GURL& engaged_site : engaged_sites) {
DCHECK(engaged_site.SchemeIsHTTPOrHTTPS());
// If the user has engaged with eTLD+1 of this site, don't show any
// lookalike navigation suggestions.
// lookalike navigation suggestions. This ignores the scheme. That's okay as
// it's the more conservative option: If the user is engaged with
// http://domain.test, not showing the warning on https://domain.test is
// acceptable.
const std::string engaged_domain_and_registry =
GetETLDPlusOne(detail.origin);
GetETLDPlusOne(engaged_site);
// eTLD+1 can be empty for private domains (e.g. http://test).
if (engaged_domain_and_registry.empty())
continue;
......@@ -119,7 +115,7 @@ std::string GetMatchingSiteEngagementDomain(
}
if (SkeletonsMatch(navigated_skeletons, skeletons))
return detail.origin.host();
return engaged_site.host();
}
return std::string();
}
......@@ -132,7 +128,9 @@ const char LookalikeUrlNavigationObserver::kHistogramName[] =
LookalikeUrlNavigationObserver::LookalikeUrlNavigationObserver(
content::WebContents* web_contents)
: WebContentsObserver(web_contents) {}
: WebContentsObserver(web_contents),
profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
weak_factory_(this) {}
LookalikeUrlNavigationObserver::~LookalikeUrlNavigationObserver() {}
......@@ -155,16 +153,23 @@ void LookalikeUrlNavigationObserver::DidFinishNavigation(
// If the user has engaged with this site, don't show any lookalike
// navigation suggestions.
Profile* profile =
Profile::FromBrowserContext(web_contents()->GetBrowserContext());
SiteEngagementService* service = SiteEngagementService::Get(profile);
if (service->IsEngagementAtLeast(url, blink::mojom::EngagementLevel::MEDIUM))
if (SiteEngagementService::Get(profile_)->IsEngagementAtLeast(
url, blink::mojom::EngagementLevel::MEDIUM))
return;
LookalikeUrlService::Get(profile_)->GetEngagedSites(
base::BindOnce(&LookalikeUrlNavigationObserver::PerformChecks,
weak_factory_.GetWeakPtr(), url));
}
void LookalikeUrlNavigationObserver::PerformChecks(
const GURL& url,
const std::vector<GURL>& engaged_sites) {
std::string matched_domain;
MatchType match_type;
if (!GetMatchingDomain(url, service, &matched_domain, &match_type))
if (!GetMatchingDomain(url, engaged_sites, &matched_domain, &match_type)) {
return;
}
DCHECK(!matched_domain.empty());
......@@ -191,7 +196,7 @@ void LookalikeUrlNavigationObserver::DidFinishNavigation(
bool LookalikeUrlNavigationObserver::GetMatchingDomain(
const GURL& url,
SiteEngagementService* service,
const std::vector<GURL>& engaged_sites,
std::string* matched_domain,
MatchType* match_type) {
// Perform all computations on eTLD+1.
......@@ -218,7 +223,7 @@ bool LookalikeUrlNavigationObserver::GetMatchingDomain(
}
const std::string matched_engaged_domain =
GetMatchingSiteEngagementDomain(service, domain_and_registry);
GetMatchingSiteEngagementDomain(engaged_sites, domain_and_registry);
if (!matched_engaged_domain.empty()) {
RecordEvent(NavigationSuggestionEvent::kMatchSiteEngagement);
*matched_domain = matched_engaged_domain;
......
......@@ -5,6 +5,11 @@
#ifndef CHROME_BROWSER_UI_OMNIBOX_LOOKALIKE_URL_NAVIGATION_OBSERVER_H_
#define CHROME_BROWSER_UI_OMNIBOX_LOOKALIKE_URL_NAVIGATION_OBSERVER_H_
#include <string>
#include <vector>
#include "base/memory/weak_ptr.h"
#include "chrome/browser/engagement/site_engagement_details.mojom.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"
......@@ -12,7 +17,7 @@ namespace content {
class NavigationHandle;
}
class SiteEngagementService;
class Profile;
// Observes navigations and shows an infobar if the navigated domain name
// is visually similar to a top domain or a domain with a site engagement score.
......@@ -62,13 +67,17 @@ class LookalikeUrlNavigationObserver
FRIEND_TEST_ALL_PREFIXES(LookalikeUrlNavigationObserverTest,
IsEditDistanceAtMostOne);
// Performs top domain and engaged site checks on the navigated |url|. Uses
// |engaged_sites| for the engaged site checks.
void PerformChecks(const GURL& url, const std::vector<GURL>& engaged_sites);
// Returns true if a domain is visually similar to the hostname of |url|. The
// matching domain can be a top domain or an engaged site. Similarity check
// is made using both visual skeleton and edit distance comparison. If this
// returns true, match details will be written into |matched_domain| and
// |match_type|. They cannot be nullptr.
bool GetMatchingDomain(const GURL& url,
SiteEngagementService* service,
const std::vector<GURL>& engaged_sites,
std::string* matched_domain,
MatchType* match_type);
......@@ -83,6 +92,9 @@ class LookalikeUrlNavigationObserver
static std::string GetSimilarDomainFromTop500(
const std::string& domain_and_registry);
Profile* profile_;
base::WeakPtrFactory<LookalikeUrlNavigationObserver> weak_factory_;
WEB_CONTENTS_USER_DATA_KEY_DECL();
};
......
......@@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "base/test/metrics/histogram_tester.h"
#include "base/test/simple_test_clock.h"
#include "chrome/browser/engagement/site_engagement_score.h"
#include "chrome/browser/engagement/site_engagement_service.h"
#include "chrome/browser/history/history_service_factory.h"
......@@ -13,6 +14,7 @@
#include "chrome/browser/ui/browser_commands.h"
#include "chrome/browser/ui/omnibox/alternate_nav_infobar_delegate.h"
#include "chrome/browser/ui/omnibox/lookalike_url_navigation_observer.h"
#include "chrome/browser/ui/omnibox/lookalike_url_service.h"
#include "chrome/browser/ui/tabs/tab_strip_model.h"
#include "chrome/common/chrome_features.h"
#include "chrome/test/base/in_process_browser_test.h"
......@@ -81,6 +83,14 @@ class LookalikeUrlNavigationObserverBrowserTest
host_resolver()->AddRule("*", "127.0.0.1");
ASSERT_TRUE(embedded_test_server()->Start());
test_ukm_recorder_ = std::make_unique<ukm::TestAutoSetUkmRecorder>();
const base::Time kNow = base::Time::FromDoubleT(1000);
test_clock_.SetNow(kNow);
LookalikeUrlService* lookalike_service =
LookalikeUrlService::Get(browser()->profile());
lookalike_service->SetClockForTesting(&test_clock_);
lookalike_service->ClearEngagedSitesForTesting();
}
// Sets the absolute Site Engagement |score| for the testing origin.
......@@ -126,11 +136,21 @@ class LookalikeUrlNavigationObserverBrowserTest
browser()->tab_strip_model()->GetActiveWebContents();
InfoBarService* infobar_service =
InfoBarService::FromWebContents(web_contents);
content::TestNavigationObserver navigation_observer(web_contents, 1);
NavigateToURL(navigated_url);
navigation_observer.Wait();
EXPECT_EQ(0u, infobar_service->infobar_count());
{
content::TestNavigationObserver navigation_observer(web_contents, 1);
NavigateToURL(navigated_url);
navigation_observer.Wait();
EXPECT_EQ(0u, infobar_service->infobar_count());
}
{
// Navigate to an empty page. This will happen after any
// LookalikeUrlService tasks, so will effectively wait for those tasks to
// finish.
content::TestNavigationObserver navigation_observer(web_contents, 1);
NavigateToURL(GURL("about:blank"));
navigation_observer.Wait();
EXPECT_EQ(0u, infobar_service->infobar_count());
}
}
void TestInfobarShown(const GURL& navigated_url,
......@@ -177,9 +197,12 @@ class LookalikeUrlNavigationObserverBrowserTest
ukm::TestUkmRecorder* test_ukm_recorder() { return test_ukm_recorder_.get(); }
base::SimpleTestClock* test_clock() { return &test_clock_; }
private:
base::test::ScopedFeatureList feature_list_;
std::unique_ptr<ukm::TestAutoSetUkmRecorder> test_ukm_recorder_;
base::SimpleTestClock test_clock_;
};
INSTANTIATE_TEST_CASE_P(,
......@@ -371,6 +394,9 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
// Even if the navigated site has a low engagement score, it should be
// considered for lookalike suggestions.
SetSiteEngagementScore(kNavigatedUrl, kLowEngagement);
// Advance the clock to force LookalikeUrlService to fetch a new engaged
// site list.
test_clock()->Advance(base::TimeDelta::FromHours(1));
if (GetParam() == FeatureTestState::kEnabled) {
// If the feature is enabled, the UI will be displayed. Expect extra
......@@ -406,9 +432,9 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
}
ukm_urls.push_back(kNavigatedUrl);
CheckUkm(ukm_urls,
LookalikeUrlNavigationObserver::MatchType::kSiteEngagement);
}
CheckUkm(ukm_urls,
LookalikeUrlNavigationObserver::MatchType::kSiteEngagement);
}
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/ui/omnibox/lookalike_url_service.h"
#include <utility>
#include "base/bind.h"
#include "base/callback.h"
#include "base/macros.h"
#include "base/memory/scoped_refptr.h"
#include "base/memory/singleton.h"
#include "base/task/post_task.h"
#include "base/time/default_clock.h"
#include "chrome/browser/content_settings/host_content_settings_map_factory.h"
#include "chrome/browser/engagement/site_engagement_service.h"
#include "chrome/browser/engagement/site_engagement_service_factory.h"
#include "chrome/browser/profiles/incognito_helpers.h"
#include "chrome/browser/profiles/profile.h"
#include "components/content_settings/core/browser/host_content_settings_map.h"
#include "components/keyed_service/content/browser_context_dependency_manager.h"
#include "components/keyed_service/content/browser_context_keyed_service_factory.h"
namespace {
constexpr uint32_t kEngagedSiteUpdateIntervalInSeconds = 5 * 60;
#include "components/keyed_service/content/browser_context_keyed_service_factory.h"
class LookalikeUrlServiceFactory : public BrowserContextKeyedServiceFactory {
public:
static LookalikeUrlService* GetForProfile(Profile* profile) {
return static_cast<LookalikeUrlService*>(
GetInstance()->GetServiceForBrowserContext(profile,
/*create_service=*/true));
}
static LookalikeUrlServiceFactory* GetInstance() {
return base::Singleton<LookalikeUrlServiceFactory>::get();
}
private:
friend struct base::DefaultSingletonTraits<LookalikeUrlServiceFactory>;
// LookalikeUrlServiceFactory();
LookalikeUrlServiceFactory()
: BrowserContextKeyedServiceFactory(
"LookalikeUrlServiceFactory",
BrowserContextDependencyManager::GetInstance()) {
DependsOn(SiteEngagementServiceFactory::GetInstance());
}
~LookalikeUrlServiceFactory() override {}
// BrowserContextKeyedServiceFactory:
KeyedService* BuildServiceInstanceFor(
content::BrowserContext* profile) const override {
return new LookalikeUrlService(static_cast<Profile*>(profile));
}
content::BrowserContext* GetBrowserContextToUse(
content::BrowserContext* context) const override {
return chrome::GetBrowserContextOwnInstanceInIncognito(context);
}
DISALLOW_COPY_AND_ASSIGN(LookalikeUrlServiceFactory);
};
} // namespace
LookalikeUrlService::LookalikeUrlService(Profile* profile)
: profile_(profile),
clock_(base::DefaultClock::GetInstance()),
weak_factory_(this) {}
LookalikeUrlService::~LookalikeUrlService() {}
// static
LookalikeUrlService* LookalikeUrlService::Get(Profile* profile) {
return LookalikeUrlServiceFactory::GetForProfile(profile);
}
void LookalikeUrlService::GetEngagedSites(EngagedSitesCallback callback) {
const base::Time now = clock_->Now();
if (!last_engagement_fetch_time_.is_null()) {
const base::TimeDelta elapsed = now - last_engagement_fetch_time_;
if (elapsed <
base::TimeDelta::FromSeconds(kEngagedSiteUpdateIntervalInSeconds)) {
std::move(callback).Run(engaged_sites_);
return;
}
}
last_engagement_fetch_time_ = now;
base::PostTaskWithTraitsAndReplyWithResult(
FROM_HERE,
{base::TaskPriority::BEST_EFFORT,
base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
base::BindOnce(
&SiteEngagementService::GetAllDetailsInBackground, now,
base::WrapRefCounted(
HostContentSettingsMapFactory::GetForProfile(profile_))),
base::BindOnce(&LookalikeUrlService::OnFetchEngagedSites,
weak_factory_.GetWeakPtr(), std::move(callback)));
}
void LookalikeUrlService::SetClockForTesting(base::Clock* clock) {
clock_ = clock;
}
void LookalikeUrlService::ClearEngagedSitesForTesting() {
engaged_sites_.clear();
last_engagement_fetch_time_ = clock_->Now();
}
void LookalikeUrlService::OnFetchEngagedSites(
EngagedSitesCallback callback,
std::vector<mojom::SiteEngagementDetails> details) {
SiteEngagementService* service = SiteEngagementService::Get(profile_);
engaged_sites_.clear();
for (const mojom::SiteEngagementDetails& detail : details) {
// Ignore sites with an engagement score below threshold.
if (!service->IsEngagementAtLeast(detail.origin,
blink::mojom::EngagementLevel::MEDIUM)) {
continue;
}
engaged_sites_.push_back(detail.origin);
}
std::move(callback).Run(engaged_sites_);
}
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_UI_OMNIBOX_LOOKALIKE_URL_SERVICE_H_
#define CHROME_BROWSER_UI_OMNIBOX_LOOKALIKE_URL_SERVICE_H_
#include <vector>
#include "base/callback_forward.h"
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
#include "base/time/time.h"
#include "chrome/browser/engagement/site_engagement_details.mojom.h"
#include "components/keyed_service/core/keyed_service.h"
#include "url/gurl.h"
class Profile;
namespace base {
class Clock;
}
// A service that handles operations on lookalike URLs. It can fetch the list of
// engaged sites in a background thread and cache the results until the next
// update. This is more efficient than fetching the list on each navigation for
// each tab separately.
class LookalikeUrlService : public KeyedService {
public:
explicit LookalikeUrlService(Profile* profile);
~LookalikeUrlService() override;
using EngagedSitesCallback =
base::OnceCallback<void(const std::vector<GURL>&)>;
static LookalikeUrlService* Get(Profile* profile);
// Triggers an update to engaged site list and passes the most recent result
// |callback|. The list is updated only after a certain amount of time passes
// after the last update. As a result, calling this method may or may not
// change the contents of engaged_sites, depending on the timing.
void GetEngagedSites(EngagedSitesCallback callback);
void SetClockForTesting(base::Clock* clock);
void ClearEngagedSitesForTesting();
private:
void OnFetchEngagedSites(EngagedSitesCallback callback,
std::vector<mojom::SiteEngagementDetails> details);
Profile* profile_;
base::Clock* clock_;
base::Time last_engagement_fetch_time_;
std::vector<GURL> engaged_sites_;
base::WeakPtrFactory<LookalikeUrlService> weak_factory_;
DISALLOW_COPY_AND_ASSIGN(LookalikeUrlService);
};
#endif // CHROME_BROWSER_UI_OMNIBOX_LOOKALIKE_URL_SERVICE_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment