Commit 13a2718a authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

Lookalike URLs: Allow safe redirects (aka defensive registrations)

Some lookalike sites are configured to redirect to their canonical counterparts.
The most common example is an IDN redirecting to the ASCII domain (e.g.
elespañol[.]com to elespanol[.]com). Currently, such redirects are blocked with
an interstitial, even though the navigation is harmless.

This CL treats a subset of such redirects as safe and allows them to go proceed
without an interstitial. In order to be deemed safe, the initial URL must
redirect to the same eTLD+1 of the URL it's visually similar to. It should also
redirect to the root of the domain without any path components. This should
cut down the number of extraneous interstitials we currently show for defensive
registrations.

Bug: 986404
Change-Id: Ie9f148d44407052c0b638fff9e9c514edf5cdef0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1713596
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Cr-Commit-Position: refs/heads/master@{#681940}
parent 94d27223
......@@ -204,6 +204,16 @@ bool IsEditDistanceAtMostOne(const base::string16& str1,
return edit_count <= 1;
}
bool IsSafeRedirect(const std::string& matching_domain,
const std::vector<GURL>& redirect_chain) {
if (redirect_chain.size() != 2) {
return false;
}
const GURL redirect_target = redirect_chain[redirect_chain.size() - 1];
return matching_domain == GetETLDPlusOne(redirect_target.host()) &&
redirect_target == redirect_target.GetWithEmptyPath();
}
LookalikeUrlNavigationThrottle::LookalikeUrlNavigationThrottle(
content::NavigationHandle* navigation_handle)
: content::NavigationThrottle(navigation_handle),
......@@ -215,7 +225,8 @@ LookalikeUrlNavigationThrottle::LookalikeUrlNavigationThrottle(
LookalikeUrlNavigationThrottle::~LookalikeUrlNavigationThrottle() {}
ThrottleCheckResult LookalikeUrlNavigationThrottle::HandleThrottleRequest(
const GURL& url) {
const GURL& url,
bool check_safe_redirect) {
// Ignore if running unit tests. Some tests use
// TestMockTimeTaskRunner::ScopedContext and call CreateTestWebContents()
// which navigates and waits for throttles to complete using a RunLoop.
......@@ -253,7 +264,8 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::HandleThrottleRequest(
if (service->EngagedSitesNeedUpdating()) {
service->ForceUpdateEngagedSites(
base::BindOnce(&LookalikeUrlNavigationThrottle::PerformChecksDeferred,
weak_factory_.GetWeakPtr(), url, navigated_domain));
weak_factory_.GetWeakPtr(), url, navigated_domain,
check_safe_redirect));
// If we're not going to show an interstitial, there's no reason to delay
// the navigation any further.
if (!interstitials_enabled_) {
......@@ -262,14 +274,17 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::HandleThrottleRequest(
return content::NavigationThrottle::DEFER;
}
return PerformChecks(url, navigated_domain, service->GetLatestEngagedSites());
return PerformChecks(url, navigated_domain, check_safe_redirect,
service->GetLatestEngagedSites());
}
ThrottleCheckResult LookalikeUrlNavigationThrottle::WillProcessResponse() {
if (navigation_handle()->GetNetErrorCode() != net::OK) {
return content::NavigationThrottle::PROCEED;
}
return HandleThrottleRequest(navigation_handle()->GetURL());
// Do not check for if the redirect was safe. That should only be done when
// the navigation is still being redirected.
return HandleThrottleRequest(navigation_handle()->GetURL(), false);
}
ThrottleCheckResult LookalikeUrlNavigationThrottle::WillRedirectRequest() {
......@@ -283,7 +298,7 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::WillRedirectRequest() {
if (chain.size() < 2) {
return content::NavigationThrottle::PROCEED;
}
return HandleThrottleRequest(chain[chain.size() - 2]);
return HandleThrottleRequest(chain[chain.size() - 2], true);
}
const char* LookalikeUrlNavigationThrottle::GetNameForLogging() {
......@@ -331,9 +346,10 @@ LookalikeUrlNavigationThrottle::MaybeCreateNavigationThrottle(
void LookalikeUrlNavigationThrottle::PerformChecksDeferred(
const GURL& url,
const DomainInfo& navigated_domain,
bool check_safe_redirect,
const std::vector<DomainInfo>& engaged_sites) {
ThrottleCheckResult result =
PerformChecks(url, navigated_domain, engaged_sites);
PerformChecks(url, navigated_domain, check_safe_redirect, engaged_sites);
if (!interstitials_enabled_) {
return;
......@@ -350,6 +366,7 @@ void LookalikeUrlNavigationThrottle::PerformChecksDeferred(
ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks(
const GURL& url,
const DomainInfo& navigated_domain,
bool check_safe_redirect,
const std::vector<DomainInfo>& engaged_sites) {
std::string matched_domain;
MatchType match_type;
......@@ -376,6 +393,11 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks(
}
DCHECK(!matched_domain.empty());
if (check_safe_redirect &&
IsSafeRedirect(matched_domain, navigation_handle()->GetRedirectChain())) {
return content::NavigationThrottle::PROCEED;
}
ukm::SourceId source_id = ukm::ConvertToSourceId(
navigation_handle()->GetNavigationId(), ukm::SourceIdType::NAVIGATION_ID);
......
......@@ -32,6 +32,14 @@ struct DomainInfo;
bool IsEditDistanceAtMostOne(const base::string16& str1,
const base::string16& str2);
// Returns true if the redirect is deemed to be safe. These are generally
// defensive registrations where the domain owner redirects the IDN to the ASCII
// domain. See the unit tests for examples.
// In short, |url| must redirect to the root of |safe_url_host| or one
// of its subdomains.
bool IsSafeRedirect(const std::string& safe_url_host,
const std::vector<GURL>& redirect_chain);
// Observes navigations and shows an interstitial if the navigated domain name
// is visually similar to a top domain or a domain with a site engagement score.
class LookalikeUrlNavigationThrottle : public content::NavigationThrottle {
......@@ -69,18 +77,23 @@ class LookalikeUrlNavigationThrottle : public content::NavigationThrottle {
FRIEND_TEST_ALL_PREFIXES(LookalikeUrlNavigationThrottleTest,
IsEditDistanceAtMostOne);
ThrottleCheckResult HandleThrottleRequest(const GURL& url);
// Checks whether the navigation to |url| can proceed. If
// |check_safe_redirect| is true, will check if a safe redirect led to |url|.
ThrottleCheckResult HandleThrottleRequest(const GURL& url,
bool check_safe_redirect);
// Performs synchronous top domain and engaged site checks on the navigated
// |url|. Uses |engaged_sites| for the engaged site checks.
ThrottleCheckResult PerformChecks(
const GURL& url,
const DomainInfo& navigated_domain,
bool check_safe_redirect,
const std::vector<DomainInfo>& engaged_sites);
// A void-returning variant, only used with deferred throttle results.
void PerformChecksDeferred(const GURL& url,
const DomainInfo& navigated_domain,
bool check_safe_redirect,
const std::vector<DomainInfo>& engaged_sites);
bool ShouldDisplayInterstitial(
......
......@@ -678,6 +678,37 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
}
}
// The site redirects to the matched site, this should not show
// an interstitial.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
Idn_SiteEngagement_SafeRedirect) {
const GURL kExpectedSuggestedUrl = GetURLWithoutPath("site1.com");
const GURL kNavigatedUrl = embedded_test_server()->GetURL(
"sité1.com", "/server-redirect?" + kExpectedSuggestedUrl.spec());
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
SetEngagementScore(browser(), kExpectedSuggestedUrl, kHighEngagement);
TestInterstitialNotShown(browser(), kNavigatedUrl);
}
// The site redirects to the matched site, but the redirect chain has more than
// two redirects.
// TODO(meacer): Consider allowing this case.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
Idn_SiteEngagement_UnsafeRedirect) {
const GURL kExpectedSuggestedUrl = GetURLWithoutPath("site1.com");
const GURL kMidUrl = embedded_test_server()->GetURL(
"sité1.com", "/server-redirect?" + kExpectedSuggestedUrl.spec());
const GURL kNavigatedUrl = embedded_test_server()->GetURL(
"other-site.test", "/server-redirect?" + kMidUrl.spec());
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
SetEngagementScore(browser(), kExpectedSuggestedUrl, kHighEngagement);
TestMetricsRecordedAndMaybeInterstitialShown(
browser(), kNavigatedUrl, kExpectedSuggestedUrl,
NavigationSuggestionEvent::kMatchSiteEngagement);
}
// Tests negative examples for all heuristics.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
NonUniqueDomains_NoMatch) {
......
......@@ -68,4 +68,37 @@ TEST(LookalikeUrlNavigationThrottleTest, IsEditDistanceAtMostOne) {
}
}
// These redirects are safe:
// - http[s]://sité.test -> http[s]://site.test
// - http[s]://sité.test/path -> http[s]://site.test
// - http[s]://subdomain.sité.test -> http[s]://site.test
// - http[s]://random.test -> http[s]://sité.test -> http[s]://site.test
// - "subdomain" on either side.
// This is not safe:
// - http[s]://[subdomain.]sité.test -> http[s]://[subdomain.]site.test/path
// because the redirected URL has a path.
TEST(LookalikeUrlNavigationThrottleTest, IsSafeRedirect) {
EXPECT_TRUE(IsSafeRedirect(
"example.com", {GURL("http://éxample.com"), GURL("http://example.com")}));
EXPECT_TRUE(IsSafeRedirect(
"example.com", {GURL("http://éxample.com"), GURL("http://example.com")}));
EXPECT_TRUE(IsSafeRedirect(
"example.com",
{GURL("http://éxample.com"), GURL("http://subdomain.example.com")}));
// Not a redirect, the chain is too short.
EXPECT_FALSE(IsSafeRedirect("example.com", {GURL("http://éxample.com")}));
// Not safe: Redirected site is not the same as the matched site.
EXPECT_FALSE(IsSafeRedirect("example.com", {GURL("http://éxample.com"),
GURL("http://other-site.com")}));
// Not safe: Initial URL doesn't redirect to the root of the suggested domain.
EXPECT_FALSE(IsSafeRedirect(
"example.com",
{GURL("http://éxample.com"), GURL("http://example.com/path")}));
// Not safe: The chain is too long.
EXPECT_FALSE(IsSafeRedirect("example.com", {GURL("http://éxample.com"),
GURL("http://intermediate.com"),
GURL("http://example.com")}));
}
} // namespace lookalikes
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment