Commit 5630fd7d authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

Trigger IDN navigation suggestion UI for sites in site engagement list

This CL expands the navigation suggestion feature by looking for potential suggestions in the engaged sites list using the site engagement service.

The match is still done by computing ICU skeletons of the hostnames. The skeletons are calculated from the eTDL+1 portions of the navigated and engaged sites, so that a navigation to a subdomain of a site looking similar to an engaged site successfully triggers the UI.

Design: https://docs.google.com/document/d/1gQCTenYuRnP77nMOo8RpS3SUxCiVewxQ9uQ3fuSefdY/edit?usp=sharing
Bug: 847662
Change-Id: I708b8f2503b16da8241145afe2f4aa0d2c001b9c
Reviewed-on: https://chromium-review.googlesource.com/1162919
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarPeter Kasting <pkasting@chromium.org>
Cr-Commit-Position: refs/heads/master@{#582218}
parent 79c70d39
......@@ -6,13 +6,17 @@
#include "base/bind.h"
#include "base/metrics/histogram_macros.h"
#include "base/stl_util.h"
#include "base/strings/utf_string_conversions.h"
#include "chrome/browser/engagement/site_engagement_service.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/ui/omnibox/alternate_nav_infobar_delegate.h"
#include "components/omnibox/browser/autocomplete_match.h"
#include "components/url_formatter/idn_spoof_checker.h"
#include "components/url_formatter/url_formatter.h"
#include "content/public/browser/navigation_details.h"
#include "content/public/browser/navigation_entry.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
namespace {
......@@ -20,6 +24,17 @@ void RecordEvent(IdnNavigationObserver::NavigationSuggestionEvent event) {
UMA_HISTOGRAM_ENUMERATION(IdnNavigationObserver::kHistogramName, event);
}
bool SkeletonsMatch(const url_formatter::Skeletons& skeletons1,
const url_formatter::Skeletons& skeletons2) {
DCHECK(!skeletons1.empty());
DCHECK(!skeletons2.empty());
for (const std::string& skeleton1 : skeletons1) {
if (base::ContainsKey(skeletons2, skeleton1))
return true;
}
return false;
}
} // namespace
// static
......@@ -35,22 +50,34 @@ void IdnNavigationObserver::NavigationEntryCommitted(
const content::LoadCommittedDetails& load_details) {
const GURL url = load_details.entry->GetVirtualURL();
const base::StringPiece host = url.host_piece();
std::string matched_domain;
url_formatter::IDNConversionResult result =
url_formatter::IDNToUnicodeWithDetails(host);
if (!result.has_idn_component || result.matching_top_domain.empty())
if (!result.has_idn_component)
return;
std::string matched_domain;
if (result.matching_top_domain.empty()) {
matched_domain = GetMatchingSiteEngagementDomain(url);
if (matched_domain.empty())
return;
RecordEvent(NavigationSuggestionEvent::kMatchSiteEngagement);
} else {
matched_domain = result.matching_top_domain;
RecordEvent(NavigationSuggestionEvent::kMatchTopSite);
}
DCHECK(!matched_domain.empty());
GURL::Replacements replace_host;
replace_host.SetHostStr(result.matching_top_domain);
replace_host.SetHostStr(matched_domain);
const GURL suggested_url = url.ReplaceComponents(replace_host);
RecordEvent(NavigationSuggestionEvent::kInfobarShown);
AlternateNavInfoBarDelegate::CreateForIDNNavigation(
web_contents(), base::UTF8ToUTF16(result.matching_top_domain),
suggested_url, load_details.entry->GetVirtualURL(),
web_contents(), base::UTF8ToUTF16(matched_domain), suggested_url,
load_details.entry->GetVirtualURL(),
base::BindOnce(RecordEvent, NavigationSuggestionEvent::kLinkClicked));
}
......@@ -63,3 +90,65 @@ void IdnNavigationObserver::CreateForWebContents(
UserDataKey(), std::make_unique<IdnNavigationObserver>(web_contents));
}
}
std::string IdnNavigationObserver::GetMatchingSiteEngagementDomain(
const GURL& url) {
Profile* profile =
Profile::FromBrowserContext(web_contents()->GetBrowserContext());
SiteEngagementService* service = SiteEngagementService::Get(profile);
if (service->IsEngagementAtLeast(url, blink::mojom::EngagementLevel::LOW))
return std::string();
// Compute skeletons using eTLD+1.
const std::string domain_and_registry =
net::registry_controlled_domains::GetDomainAndRegistry(
url, net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
url_formatter::IDNConversionResult result =
url_formatter::IDNToUnicodeWithDetails(domain_and_registry);
DCHECK(result.has_idn_component);
const url_formatter::Skeletons navigated_skeletons =
url_formatter::GetSkeletons(result.result);
std::map<std::string, url_formatter::Skeletons>
domain_and_registry_to_skeleton;
std::vector<mojom::SiteEngagementDetails> engagement_details =
service->GetAllDetails();
for (const auto& detail : engagement_details) {
// Ignore sites with an engagement score lower than LOW.
if (!service->IsEngagementAtLeast(detail.origin,
blink::mojom::EngagementLevel::LOW))
continue;
// If this is already an engaged site, don't suggest any alternatives.
const std::string engaged_domain_and_registry =
net::registry_controlled_domains::GetDomainAndRegistry(
detail.origin,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
if (domain_and_registry == engaged_domain_and_registry)
return std::string();
// Multiple domains can map to the same eTLD+1, avoid skeleton generation
// when possible.
auto it = domain_and_registry_to_skeleton.find(engaged_domain_and_registry);
url_formatter::Skeletons skeletons;
if (it == domain_and_registry_to_skeleton.end()) {
// Engaged site can be IDN. Decode as unicode and compute the skeleton
// from that. At this point, top domain checks have already been done, so
// if the site is IDN, it'll always be decoded as unicode (i.e. IDN spoof
// checker will not find a matching top domain and fall back to punycode
// for it).
url_formatter::IDNConversionResult conversion_result =
url_formatter::IDNToUnicodeWithDetails(engaged_domain_and_registry);
skeletons = url_formatter::GetSkeletons(conversion_result.result);
domain_and_registry_to_skeleton[engaged_domain_and_registry] = skeletons;
} else {
skeletons = it->second;
}
if (SkeletonsMatch(navigated_skeletons, skeletons))
return detail.origin.host();
}
return std::string();
}
......@@ -19,10 +19,12 @@ class IdnNavigationObserver
kNone = 0,
kInfobarShown = 1,
kLinkClicked = 2,
kMatchTopSite = 3,
kMatchSiteEngagement = 4,
// Append new items to the end of the list above; do not modify or
// replace existing values. Comment out obsolete items.
kMaxValue = kLinkClicked,
kMaxValue = kMatchSiteEngagement,
};
static const char kHistogramName[];
......@@ -35,6 +37,11 @@ class IdnNavigationObserver
// content::WebContentsObserver:
void NavigationEntryCommitted(
const content::LoadCommittedDetails& load_details) override;
private:
// Returns a site that the user has used before that |url| may be attempting
// to spoof, based on skeleton comparison.
std::string GetMatchingSiteEngagementDomain(const GURL& url);
};
#endif // CHROME_BROWSER_UI_OMNIBOX_IDN_NAVIGATION_OBSERVER_H_
......@@ -3,6 +3,8 @@
// found in the LICENSE file.
#include "base/test/metrics/histogram_tester.h"
#include "chrome/browser/engagement/site_engagement_score.h"
#include "chrome/browser/engagement/site_engagement_service.h"
#include "chrome/browser/history/history_service_factory.h"
#include "chrome/browser/history/history_test_utils.h"
#include "chrome/browser/infobars/infobar_observer.h"
......@@ -27,7 +29,7 @@ class IdnNavigationObserverBrowserTest
public testing::WithParamInterface<bool> {
protected:
void SetUp() override {
if (GetParam())
if (IsFeatureEnabled())
feature_list_.InitAndEnableFeature(features::kIdnNavigationSuggestions);
InProcessBrowserTest::SetUp();
}
......@@ -37,6 +39,57 @@ class IdnNavigationObserverBrowserTest
ASSERT_TRUE(embedded_test_server()->Start());
}
// Sets the absolute Site Engagement |score| for the testing origin.
void SetSiteEngagementScore(const GURL& url, double score) {
SiteEngagementService::Get(browser()->profile())
->ResetBaseScoreForURL(url, score);
}
void TestInfobarShown(const GURL& navigated_url,
const GURL& expected_suggested_url) {
// Sanity check navigated_url.
url_formatter::IDNConversionResult result =
url_formatter::IDNToUnicodeWithDetails(navigated_url.host_piece());
ASSERT_TRUE(result.has_idn_component);
history::HistoryService* const history_service =
HistoryServiceFactory::GetForProfile(
browser()->profile(), ServiceAccessType::EXPLICIT_ACCESS);
ui_test_utils::WaitForHistoryToLoad(history_service);
content::WebContents* web_contents =
browser()->tab_strip_model()->GetActiveWebContents();
InfoBarService* infobar_service =
InfoBarService::FromWebContents(web_contents);
InfoBarObserver infobar_added_observer(
infobar_service, InfoBarObserver::Type::kInfoBarAdded);
ui_test_utils::NavigateToURL(browser(), navigated_url);
infobar_added_observer.Wait();
infobars::InfoBar* infobar = infobar_service->infobar_at(0);
EXPECT_EQ(infobars::InfoBarDelegate::ALTERNATE_NAV_INFOBAR_DELEGATE,
infobar->delegate()->GetIdentifier());
// Clicking the link in the infobar should remove the infobar and navigate
// to the suggested URL.
InfoBarObserver infobar_removed_observer(
infobar_service, InfoBarObserver::Type::kInfoBarRemoved);
AlternateNavInfoBarDelegate* infobar_delegate =
static_cast<AlternateNavInfoBarDelegate*>(infobar->delegate());
infobar_delegate->LinkClicked(WindowOpenDisposition::CURRENT_TAB);
infobar_removed_observer.Wait();
EXPECT_EQ(0u, infobar_service->infobar_count());
EXPECT_EQ(expected_suggested_url, web_contents->GetURL());
// Clicking the link in the infobar should also remove the original URL from
// history.
ui_test_utils::HistoryEnumerator enumerator(browser()->profile());
EXPECT_FALSE(base::ContainsValue(enumerator.urls(), navigated_url));
}
bool IsFeatureEnabled() const { return GetParam(); }
private:
base::test::ScopedFeatureList feature_list_;
};
......@@ -76,58 +129,76 @@ IN_PROC_BROWSER_TEST_P(IdnNavigationObserverBrowserTest,
// Navigating to a domain whose visual representation looks like a top domain
// should show a "Did you mean to go to ..." infobar.
IN_PROC_BROWSER_TEST_P(IdnNavigationObserverBrowserTest, TopDomainIdn_Infobar) {
if (!GetParam())
if (!IsFeatureEnabled())
return;
base::HistogramTester histograms;
history::HistoryService* const history_service =
HistoryServiceFactory::GetForProfile(browser()->profile(),
ServiceAccessType::EXPLICIT_ACCESS);
ui_test_utils::WaitForHistoryToLoad(history_service);
const GURL kIdnUrl =
embedded_test_server()->GetURL("googlé.com", "/title1.html");
const GURL kSuggestedUrl =
embedded_test_server()->GetURL("google.com", "/title1.html");
content::WebContents* web_contents =
browser()->tab_strip_model()->GetActiveWebContents();
InfoBarService* infobar_service =
InfoBarService::FromWebContents(web_contents);
InfoBarObserver infobar_added_observer(infobar_service,
InfoBarObserver::Type::kInfoBarAdded);
ui_test_utils::NavigateToURL(browser(), kIdnUrl);
infobar_added_observer.Wait();
infobars::InfoBar* infobar = infobar_service->infobar_at(0);
EXPECT_EQ(infobars::InfoBarDelegate::ALTERNATE_NAV_INFOBAR_DELEGATE,
infobar->delegate()->GetIdentifier());
// Clicking the link in the infobar should remove the infobar and navigate to
// the suggested URL.
InfoBarObserver infobar_removed_observer(
infobar_service, InfoBarObserver::Type::kInfoBarRemoved);
AlternateNavInfoBarDelegate* infobar_delegate =
static_cast<AlternateNavInfoBarDelegate*>(infobar->delegate());
infobar_delegate->LinkClicked(WindowOpenDisposition::CURRENT_TAB);
infobar_removed_observer.Wait();
EXPECT_EQ(0u, infobar_service->infobar_count());
EXPECT_EQ(kSuggestedUrl, web_contents->GetURL());
// Clicking the link in the infobar should also remove the original URL from
// history.
ui_test_utils::HistoryEnumerator enumerator(browser()->profile());
EXPECT_FALSE(base::ContainsValue(enumerator.urls(), kIdnUrl));
TestInfobarShown(embedded_test_server()->GetURL(
"googlé.com", "/title1.html") /* navigated */,
embedded_test_server()->GetURL(
"google.com", "/title1.html") /* suggested */);
histograms.ExpectTotalCount(IdnNavigationObserver::kHistogramName, 2);
histograms.ExpectTotalCount(IdnNavigationObserver::kHistogramName, 3);
histograms.ExpectBucketCount(
IdnNavigationObserver::kHistogramName,
IdnNavigationObserver::NavigationSuggestionEvent::kInfobarShown, 1);
histograms.ExpectBucketCount(
IdnNavigationObserver::kHistogramName,
IdnNavigationObserver::NavigationSuggestionEvent::kLinkClicked, 1);
histograms.ExpectBucketCount(
IdnNavigationObserver::kHistogramName,
IdnNavigationObserver::NavigationSuggestionEvent::kMatchTopSite, 1);
}
// Navigating to a domain whose visual representation looks like a domain with a
// site engagement score above a certain threshold should show a "Did you mean
// to go to ..." infobar.
IN_PROC_BROWSER_TEST_P(IdnNavigationObserverBrowserTest,
SiteEngagement_Infobar) {
if (!IsFeatureEnabled())
return;
SetSiteEngagementScore(GURL("http://site1.test"), 20);
SetSiteEngagementScore(GURL("http://www.site2.test"), 20);
SetSiteEngagementScore(GURL("http://sité3.test"), 20);
SetSiteEngagementScore(GURL("http://www.sité4.test"), 20);
struct TestCase {
const char* const navigated;
const char* const suggested;
} kTestCases[] = {
{"sité1.test", "site1.test"},
{"mail.www.sité1.test", "site1.test"},
// These should match since the comparison uses eTLD+1s.
{"sité2.test", "www.site2.test"},
{"mail.sité2.test", "www.site2.test"},
{"síté3.test", "sité3.test"},
{"mail.síté3.test", "sité3.test"},
{"síté4.test", "www.sité4.test"},
{"mail.síté4.test", "www.sité4.test"},
};
for (const auto& test_case : kTestCases) {
base::HistogramTester histograms;
TestInfobarShown(
embedded_test_server()->GetURL(test_case.navigated, "/title1.html"),
embedded_test_server()->GetURL(test_case.suggested, "/title1.html"));
histograms.ExpectTotalCount(IdnNavigationObserver::kHistogramName, 3);
histograms.ExpectBucketCount(
IdnNavigationObserver::kHistogramName,
IdnNavigationObserver::NavigationSuggestionEvent::kInfobarShown, 1);
histograms.ExpectBucketCount(
IdnNavigationObserver::kHistogramName,
IdnNavigationObserver::NavigationSuggestionEvent::kLinkClicked, 1);
histograms.ExpectBucketCount(
IdnNavigationObserver::kHistogramName,
IdnNavigationObserver::NavigationSuggestionEvent::kMatchSiteEngagement,
1);
}
}
// The infobar shouldn't be shown when the feature is disabled.
......
......@@ -86,14 +86,13 @@ IDNSpoofChecker::HuffmanTrieParams g_trie_params{
kTopDomainsHuffmanTree, sizeof(kTopDomainsHuffmanTree), kTopDomainsTrie,
kTopDomainsTrieBits, kTopDomainsRootPosition};
std::string LookupMatchInTopDomains(const icu::UnicodeString& ustr_skeleton) {
std::string LookupMatchInTopDomains(const std::string& skeleton) {
DCHECK(!skeleton.empty());
DCHECK_NE(skeleton.back(), '.');
TopDomainPreloadDecoder preload_decoder(
g_trie_params.huffman_tree, g_trie_params.huffman_tree_size,
g_trie_params.trie, g_trie_params.trie_bits,
g_trie_params.trie_root_position);
std::string skeleton;
ustr_skeleton.toUTF8String(skeleton);
DCHECK_NE(skeleton.back(), '.');
auto labels = base::SplitStringPiece(skeleton, ".", base::KEEP_WHITESPACE,
base::SPLIT_WANT_ALL);
......@@ -366,6 +365,17 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label,
}
std::string IDNSpoofChecker::GetSimilarTopDomain(base::StringPiece16 hostname) {
for (const std::string& skeleton : GetSkeletons(hostname)) {
DCHECK(!skeleton.empty());
std::string matching_top_domain = LookupMatchInTopDomains(skeleton);
if (!matching_top_domain.empty())
return matching_top_domain;
}
return std::string();
}
Skeletons IDNSpoofChecker::GetSkeletons(base::StringPiece16 hostname) {
Skeletons skeletons;
size_t hostname_length = hostname.length() - (hostname.back() == '.' ? 1 : 0);
icu::UnicodeString host(FALSE, hostname.data(), hostname_length);
// If input has any characters outside Latin-Greek-Cyrillic and [0-9._-],
......@@ -376,7 +386,7 @@ std::string IDNSpoofChecker::GetSimilarTopDomain(base::StringPiece16 hostname) {
extra_confusable_mapper_->transliterate(host);
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString skeleton;
icu::UnicodeString ustr_skeleton;
// Map U+04CF (ӏ) to lowercase L in addition to what uspoof_getSkeleton does
// (mapping it to lowercase I).
......@@ -385,21 +395,27 @@ std::string IDNSpoofChecker::GetSimilarTopDomain(base::StringPiece16 hostname) {
icu::UnicodeString host_alt(host);
size_t length = host_alt.length();
char16_t* buffer = host_alt.getBuffer(-1);
for (char16_t* uc = buffer + u04cf_pos ; uc < buffer + length; ++uc) {
for (char16_t* uc = buffer + u04cf_pos; uc < buffer + length; ++uc) {
if (*uc == 0x4CF)
*uc = 0x6C; // Lowercase L
}
host_alt.releaseBuffer(length);
uspoof_getSkeletonUnicodeString(checker_, 0, host_alt, skeleton, &status);
uspoof_getSkeletonUnicodeString(checker_, 0, host_alt, ustr_skeleton,
&status);
if (U_SUCCESS(status)) {
std::string match = LookupMatchInTopDomains(skeleton);
if (!match.empty())
return match;
std::string skeleton;
ustr_skeleton.toUTF8String(skeleton);
skeletons.insert(skeleton);
}
}
uspoof_getSkeletonUnicodeString(checker_, 0, host, skeleton, &status);
return U_SUCCESS(status) ? LookupMatchInTopDomains(skeleton) : std::string();
uspoof_getSkeletonUnicodeString(checker_, 0, host, ustr_skeleton, &status);
if (U_SUCCESS(status)) {
std::string skeleton;
ustr_skeleton.toUTF8String(skeleton);
skeletons.insert(skeleton);
}
return skeletons;
}
bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic(
......
......@@ -8,6 +8,7 @@
#include <memory>
#include <string>
#include "base/containers/flat_set.h"
#include "base/gtest_prod_util.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece_forward.h"
......@@ -30,6 +31,8 @@ struct USpoofChecker;
namespace url_formatter {
FORWARD_DECLARE_TEST(UrlFormatterTest, IDNToUnicode);
using Skeletons = base::flat_set<std::string>;
// A helper class for IDN Spoof checking, used to ensure that no IDN input is
// spoofable per Chromium's standard of spoofability. For a more thorough
// explanation of how spoof checking works in Chromium, see
......@@ -64,6 +67,10 @@ class IDNSpoofChecker {
// top domains. Note that non-IDN hostnames will not get here.
std::string GetSimilarTopDomain(base::StringPiece16 hostname);
// Returns skeleton strings computed from |hostname|. This function can apply
// extra mappings to some characters to produce multiple skeletons.
Skeletons GetSkeletons(base::StringPiece16 hostname);
private:
// Sets allowed characters in IDN labels and turns on USPOOF_CHAR_LIMIT.
void SetAllowedUnicodeSet(UErrorCode* status);
......
......@@ -700,4 +700,8 @@ base::string16 StripWWWFromHost(const GURL& url) {
return StripWWW(base::ASCIIToUTF16(url.host_piece()));
}
Skeletons GetSkeletons(const base::string16& host) {
return g_idn_spoof_checker.Get().GetSkeletons(host);
}
} // namespace url_formatter
......@@ -21,6 +21,7 @@
#include <string>
#include <vector>
#include "base/containers/flat_set.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_offset_string_conversions.h"
......@@ -34,6 +35,8 @@ struct Parsed;
namespace url_formatter {
using Skeletons = base::flat_set<std::string>;
// Used by FormatUrl to specify handling of certain parts of the url.
typedef uint32_t FormatUrlType;
typedef uint32_t FormatUrlTypes;
......@@ -179,6 +182,9 @@ base::string16 StripWWW(const base::string16& text);
// Runs |url|'s host through StripWWW(). |url| must be valid.
base::string16 StripWWWFromHost(const GURL& url);
// Returns skeleton strings computed from |host| for spoof checking.
Skeletons GetSkeletons(const base::string16& host);
} // namespace url_formatter
#endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
......@@ -32362,6 +32362,9 @@ Called by update_use_counter_css.py.-->
<int value="0" label="None"/>
<int value="1" label="Infobar shown"/>
<int value="2" label="Link clicked on the infobar"/>
<int value="3" label="A navigation suggestion is found using top sites list"/>
<int value="4"
label="A navigation suggestion is found using site engagement"/>
</enum>
<enum name="NavigationWasServedFromCache">
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment