Commit 2a877987 authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

Allow IDN labels for subdomains of top domains

A previous fix introduced a top domain list to prevent phishing
attempts using IDN. That fix compares a domain against a top domain,
removes a label, and checks again until only a single label is left.
This is problematic because it assumes subdomains of top domains
cannot be IDN. As a result, subdomains of top domains are not decoded
as unicode.

This CL fixes this problem by adding an extra test to check if the
hostname is a top domain itself or a subdomain of a top domain. If true,
it's not treated as a lookalike and decoded as unicode.

Bug: 769547
Change-Id: Id3b0589128bce974f6789c4151738ad74ca0ba6a
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1320813Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarTommy Li <tommycli@chromium.org>
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#747860}
parent 1b4bfc5f
...@@ -117,6 +117,15 @@ bool HasUnsafeMiddleDot(const icu::UnicodeString& label_string, ...@@ -117,6 +117,15 @@ bool HasUnsafeMiddleDot(const icu::UnicodeString& label_string,
return false; return false;
} }
bool IsSubdomainOf(base::StringPiece16 hostname,
const base::string16& top_domain) {
DCHECK_NE(hostname, top_domain);
DCHECK(!hostname.empty());
DCHECK(!top_domain.empty());
return base::EndsWith(hostname, base::ASCIIToUTF16(".") + top_domain,
base::CompareCase::INSENSITIVE_ASCII);
}
#include "components/url_formatter/spoof_checks/top_domains/domains-trie-inc.cc" #include "components/url_formatter/spoof_checks/top_domains/domains-trie-inc.cc"
// All the domains in the above file have 4 or fewer labels. // All the domains in the above file have 4 or fewer labels.
...@@ -508,6 +517,15 @@ TopDomainEntry IDNSpoofChecker::GetSimilarTopDomain( ...@@ -508,6 +517,15 @@ TopDomainEntry IDNSpoofChecker::GetSimilarTopDomain(
DCHECK(!skeleton.empty()); DCHECK(!skeleton.empty());
TopDomainEntry matching_top_domain = LookupSkeletonInTopDomains(skeleton); TopDomainEntry matching_top_domain = LookupSkeletonInTopDomains(skeleton);
if (!matching_top_domain.domain.empty()) { if (!matching_top_domain.domain.empty()) {
const base::string16 top_domain =
base::UTF8ToUTF16(matching_top_domain.domain);
// Return an empty result if hostname is a top domain itself, or a
// subdomain of top domain. This prevents subdomains of top domains from
// being marked as spoofs. Without this check, éxample.blogspot.com
// would return blogspot.com and treated as a top domain lookalike.
if (hostname == top_domain || IsSubdomainOf(hostname, top_domain)) {
return TopDomainEntry();
}
return matching_top_domain; return matching_top_domain;
} }
} }
......
...@@ -68,8 +68,9 @@ class IDNSpoofChecker { ...@@ -68,8 +68,9 @@ class IDNSpoofChecker {
base::StringPiece16 top_level_domain_unicode); base::StringPiece16 top_level_domain_unicode);
// Returns the matching top domain if |hostname| or the last few components of // Returns the matching top domain if |hostname| or the last few components of
// |hostname| looks similar to one of top domains listed i // |hostname| looks similar to one of top domains listed in domains.list.
// top_domains/alexa_domains.list. // Returns empty result if |hostname| is a top domain itself, or is a
// subdomain of a top domain.
// Two checks are done: // Two checks are done:
// 1. Calculate the skeleton of |hostname| based on the Unicode confusable // 1. Calculate the skeleton of |hostname| based on the Unicode confusable
// character list and look it up in the pre-calculated skeleton list of // character list and look it up in the pre-calculated skeleton list of
......
...@@ -1264,6 +1264,12 @@ const IDNTestCase kIdnCases[] = { ...@@ -1264,6 +1264,12 @@ const IDNTestCase kIdnCases[] = {
// IDN domain matching an IDN top-domain (fóó.com) // IDN domain matching an IDN top-domain (fóó.com)
{"xn--fo-5ja.com", L"fóo.com", kUnsafe}, {"xn--fo-5ja.com", L"fóo.com", kUnsafe},
// crbug.com/769547: Subdomains of top domains should be allowed.
{"xn--xample-9ua.test.net", L"éxample.test.net", kSafe},
// Skeleton of the eTLD+1 matches a top domain, but the eTLD+1 itself is
// not a top domain. Should not be decoded to unicode.
{"xn--xample-9ua.test.xn--nt-bja", L"éxample.test.nét", kUnsafe},
}; };
namespace test { namespace test {
...@@ -1332,6 +1338,29 @@ TEST_F(IDNSpoofCheckerTest, IDNToUnicode) { ...@@ -1332,6 +1338,29 @@ TEST_F(IDNSpoofCheckerTest, IDNToUnicode) {
} }
} }
TEST_F(IDNSpoofCheckerTest, GetSimilarTopDomain) {
struct TestCase {
const wchar_t* const hostname;
const char* const expected_top_domain;
} kTestCases[] = {
{L"tést.net", "test.net"},
{L"subdomain.tést.net", "test.net"},
// A top domain should not return a similar top domain result.
{L"test.net", ""},
// A subdomain of a top domain should not return a similar top domain
// result.
{L"subdomain.test.net", ""},
// An IDN subdomain of a top domain should not return a similar top domain
// result.
{L"subdómain.test.net", ""}};
for (const TestCase& test_case : kTestCases) {
const TopDomainEntry entry = IDNSpoofChecker().GetSimilarTopDomain(
base::WideToUTF16(test_case.hostname));
EXPECT_EQ(test_case.expected_top_domain, entry.domain);
EXPECT_FALSE(entry.is_top_500);
}
}
TEST_F(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) { TEST_F(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) {
{ {
TopDomainEntry entry = TopDomainEntry entry =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment