Commit 4f864da9 authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

Add failure reason to IDN spoof checks

This CL adds a new enum (IDNSpoofChecker::Result) and returns it from
IDN spoof checking code. This result will then be used to convert
some punycode fallback cases to other types of UI (interstitials or
safety tips).

Bug: 1095120
Change-Id: I68c1b3c639edc3f71a0f2c4d3d5adbce7a689c9e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2246833
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarPeter Kasting <pkasting@chromium.org>
Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Cr-Commit-Position: refs/heads/master@{#779095}
parent a48f280b
...@@ -353,7 +353,7 @@ IDNSpoofChecker::~IDNSpoofChecker() { ...@@ -353,7 +353,7 @@ IDNSpoofChecker::~IDNSpoofChecker() {
uspoof_close(checker_); uspoof_close(checker_);
} }
bool IDNSpoofChecker::SafeToDisplayAsUnicode( IDNSpoofChecker::Result IDNSpoofChecker::SafeToDisplayAsUnicode(
base::StringPiece16 label, base::StringPiece16 label,
base::StringPiece top_level_domain, base::StringPiece top_level_domain,
base::StringPiece16 top_level_domain_unicode) { base::StringPiece16 top_level_domain_unicode) {
...@@ -363,8 +363,9 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( ...@@ -363,8 +363,9 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
base::checked_cast<int32_t>(label.size()), nullptr, &status); base::checked_cast<int32_t>(label.size()), nullptr, &status);
// If uspoof_check fails (due to library failure), or if any of the checks // If uspoof_check fails (due to library failure), or if any of the checks
// fail, treat the IDN as unsafe. // fail, treat the IDN as unsafe.
if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS)) if (U_FAILURE(status) || (result & USPOOF_ALL_CHECKS)) {
return false; return Result::kICUSpoofChecks;
}
icu::UnicodeString label_string(FALSE /* isTerminated */, label.data(), icu::UnicodeString label_string(FALSE /* isTerminated */, label.data(),
base::checked_cast<int32_t>(label.size())); base::checked_cast<int32_t>(label.size()));
...@@ -381,21 +382,21 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( ...@@ -381,21 +382,21 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
// as Unicode would be canonicalized to 'fuss' by GURL and is displayed as // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as
// such. See http://crbug.com/595263 . // such. See http://crbug.com/595263 .
if (deviation_characters_.containsSome(label_string)) if (deviation_characters_.containsSome(label_string))
return false; return Result::kDeviationCharacters;
// Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is). // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is).
if (label_string.length() > 1 && top_level_domain != "is" && if (label_string.length() > 1 && top_level_domain != "is" &&
icelandic_characters_.containsSome(label_string)) icelandic_characters_.containsSome(label_string))
return false; return Result::kTLDSpecificCharacters;
// Disallow Latin Schwa (U+0259) for domains outside Azerbaijan's ccTLD (.az). // Disallow Latin Schwa (U+0259) for domains outside Azerbaijan's ccTLD (.az).
if (label_string.length() > 1 && top_level_domain != "az" && if (label_string.length() > 1 && top_level_domain != "az" &&
label_string.indexOf("ə") != -1) label_string.indexOf("ə") != -1)
return false; return Result::kTLDSpecificCharacters;
// Disallow middle dot (U+00B7) when unsafe. // Disallow middle dot (U+00B7) when unsafe.
if (HasUnsafeMiddleDot(label_string, top_level_domain)) { if (HasUnsafeMiddleDot(label_string, top_level_domain)) {
return false; return Result::kUnsafeMiddleDot;
} }
// If there's no script mixing, the input is regarded as safe without any // If there's no script mixing, the input is regarded as safe without any
...@@ -411,7 +412,8 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( ...@@ -411,7 +412,8 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
// - Korean: Hangul, Han, Common // - Korean: Hangul, Han, Common
result &= USPOOF_RESTRICTION_LEVEL_MASK; result &= USPOOF_RESTRICTION_LEVEL_MASK;
if (result == USPOOF_ASCII) if (result == USPOOF_ASCII)
return true; return Result::kSafe;
if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE &&
kana_letters_exceptions_.containsNone(label_string) && kana_letters_exceptions_.containsNone(label_string) &&
combining_diacritics_exceptions_.containsNone(label_string)) { combining_diacritics_exceptions_.containsNone(label_string)) {
...@@ -419,15 +421,15 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( ...@@ -419,15 +421,15 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
if (IsLabelWholeScriptConfusableForScript(*script.get(), label_string) && if (IsLabelWholeScriptConfusableForScript(*script.get(), label_string) &&
!IsWholeScriptConfusableAllowedForTLD(*script.get(), top_level_domain, !IsWholeScriptConfusableAllowedForTLD(*script.get(), top_level_domain,
top_level_domain_unicode)) { top_level_domain_unicode)) {
return false; return Result::kWholeScriptConfusable;
} }
} }
return true; return Result::kSafe;
} }
// Disallow domains that contain only numbers and number-spoofs. // Disallow domains that contain only numbers and number-spoofs.
if (IsDigitLookalike(label_string)) if (IsDigitLookalike(label_string))
return false; return Result::kDigitLookalikes;
// Additional checks for |label| with multiple scripts, one of which is Latin. // Additional checks for |label| with multiple scripts, one of which is Latin.
// Disallow non-ASCII Latin letters to mix with a non-Latin script. // Disallow non-ASCII Latin letters to mix with a non-Latin script.
...@@ -436,7 +438,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( ...@@ -436,7 +438,7 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
// because script mixing of LGC is already rejected. // because script mixing of LGC is already rejected.
if (non_ascii_latin_letters_.containsSome(label_string) && if (non_ascii_latin_letters_.containsSome(label_string) &&
!lgc_letters_n_ascii_.containsAll(label_string)) !lgc_letters_n_ascii_.containsAll(label_string))
return false; return Result::kNonAsciiLatinCharMixedWithNonLatin;
icu::RegexMatcher* dangerous_pattern = icu::RegexMatcher* dangerous_pattern =
reinterpret_cast<icu::RegexMatcher*>(DangerousPatternTLS().Get()); reinterpret_cast<icu::RegexMatcher*>(DangerousPatternTLS().Get());
...@@ -526,7 +528,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode( ...@@ -526,7 +528,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
DangerousPatternTLS().Set(dangerous_pattern); DangerousPatternTLS().Set(dangerous_pattern);
} }
dangerous_pattern->reset(label_string); dangerous_pattern->reset(label_string);
return !dangerous_pattern->find(); if (dangerous_pattern->find()) {
return Result::kDangerousPattern;
}
return Result::kSafe;
} }
TopDomainEntry IDNSpoofChecker::GetSimilarTopDomain( TopDomainEntry IDNSpoofChecker::GetSimilarTopDomain(
......
...@@ -74,17 +74,50 @@ class IDNSpoofChecker { ...@@ -74,17 +74,50 @@ class IDNSpoofChecker {
size_t trie_root_position; size_t trie_root_position;
}; };
enum class Result {
// Spoof checks weren't performed because the domain wasn't IDN. Should
// never be returned from SafeToDisplayAsUnicode.
kNone,
// The domain passed all spoof checks.
kSafe,
// Failed ICU's standard spoof checks such as Greek mixing with Latin.
kICUSpoofChecks,
// Domain contains deviation characters.
kDeviationCharacters,
// Domain contains characters that are only allowed for certain TLDs, such
// as thorn (þ) used outside Icelandic.
kTLDSpecificCharacters,
// Domain has an unsafe middle dot.
kUnsafeMiddleDot,
// Domain is composed of only Latin-like characters from non Latin scripts.
// E.g. apple.com but apple in Cyrillic (xn--80ak6aa92e.com).
kWholeScriptConfusable,
// Domain is composed of only characters that look like digits.
kDigitLookalikes,
// Domain mixes Non-ASCII Latin with Non-Latin characters.
kNonAsciiLatinCharMixedWithNonLatin,
// Domain contains dangerous patterns that are mostly found when mixing
// Latin and CJK scripts. E.g. Katakana iteration mark (U+30FD) not preceded
// by Katakana.
kDangerousPattern,
};
IDNSpoofChecker(); IDNSpoofChecker();
~IDNSpoofChecker(); ~IDNSpoofChecker();
// Returns kSafe if |label| is safe to display as Unicode and fills
// Returns true if |label| is safe to display as Unicode. In the event of // |top_level_domain_unicode| with the converted value. Otherwise, returns the
// library failure, all IDN inputs will be treated as unsafe. // reason of the failure and leaves |top_level_domain_unicode| unchanged.
// This method doesn't check for similarity to a top domain: If the input
// matches a top domain but is otherwise safe (e.g. googlé.com), the result
// will be kSafe.
// In the event of library failure, all IDN inputs will be treated as unsafe
// and the return value will be kUSpoofChecks.
// See the function body for details on the specific safety checks performed. // See the function body for details on the specific safety checks performed.
// top_level_domain_unicode can be empty if top_level_domain is not well // |top_level_domain_unicode| can be empty if |top_level_domain| is not well
// formed punycode. // formed punycode.
bool SafeToDisplayAsUnicode(base::StringPiece16 label, Result SafeToDisplayAsUnicode(base::StringPiece16 label,
base::StringPiece top_level_domain, base::StringPiece top_level_domain,
base::StringPiece16 top_level_domain_unicode); base::StringPiece16 top_level_domain_unicode);
// Returns the matching top domain if |hostname| or the last few components of // Returns the matching top domain if |hostname| or the last few components of
// |hostname| looks similar to one of top domains listed in domains.list. // |hostname| looks similar to one of top domains listed in domains.list.
......
...@@ -1428,6 +1428,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { ...@@ -1428,6 +1428,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) {
const char* const expected_matching_domain; const char* const expected_matching_domain;
// If true, the matching top domain is expected to be in top 500. // If true, the matching top domain is expected to be in top 500.
const bool expected_is_top_500; const bool expected_is_top_500;
const IDNSpoofChecker::Result expected_spoof_check_result;
} kTestCases[] = { } kTestCases[] = {
{// An ASCII, top domain. {// An ASCII, top domain.
"google.com", L"google.com", false, "google.com", L"google.com", false,
...@@ -1435,22 +1436,25 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { ...@@ -1435,22 +1436,25 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) {
"", "",
// ...And since we don't match it to a top domain, we don't know if it's // ...And since we don't match it to a top domain, we don't know if it's
// a top 500 domain. // a top 500 domain.
false}, false, IDNSpoofChecker::Result::kNone},
{// An ASCII domain that's not a top domain. {// An ASCII domain that's not a top domain.
"not-top-domain.com", L"not-top-domain.com", false, "", false}, "not-top-domain.com", L"not-top-domain.com", false, "", false,
IDNSpoofChecker::Result::kNone},
{// A unicode domain that's valid according to all of the rules in IDN {// A unicode domain that's valid according to all of the rules in IDN
// spoof checker except that it matches a top domain. Should be // spoof checker except that it matches a top domain. Should be
// converted to punycode. // converted to punycode. Spoof check result is kSafe because top domain
"xn--googl-fsa.com", L"googlé.com", true, "google.com", true}, // similarity isn't included in IDNSpoofChecker::Result.
"xn--googl-fsa.com", L"googlé.com", true, "google.com", true,
IDNSpoofChecker::Result::kSafe},
{// A unicode domain that's not valid according to the rules in IDN spoof {// A unicode domain that's not valid according to the rules in IDN spoof
// checker (mixed script) and it matches a top domain. Should be // checker (whole script confusable in Cyrillic) and it matches a top
// converted to punycode. // domain. Should be converted to punycode.
"xn--80ak6aa92e.com", L"аррӏе.com", true, "apple.com", true}, "xn--80ak6aa92e.com", L"аррӏе.com", true, "apple.com", true,
IDNSpoofChecker::Result::kWholeScriptConfusable},
{// A unicode domain that's not valid according to the rules in IDN spoof {// A unicode domain that's not valid according to the rules in IDN spoof
// checker (mixed script) but it doesn't match a top domain. // checker (mixed script) but it doesn't match a top domain.
"xn--o-o-oai-26a223aia177a7ab7649d.com", L"ɴoτ-τoρ-ďoᛖaiɴ.com", true, "", "xn--o-o-oai-26a223aia177a7ab7649d.com", L"ɴoτ-τoρ-ďoᛖaiɴ.com", true, "",
false}, false, IDNSpoofChecker::Result::kICUSpoofChecks}};
};
for (const TestCase& test_case : kTestCases) { for (const TestCase& test_case : kTestCases) {
const url_formatter::IDNConversionResult result = const url_formatter::IDNConversionResult result =
...@@ -1461,6 +1465,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) { ...@@ -1461,6 +1465,7 @@ TEST(IDNSpoofCheckerNoFixtureTest, UnsafeIDNToUnicodeWithDetails) {
result.matching_top_domain.domain); result.matching_top_domain.domain);
EXPECT_EQ(test_case.expected_is_top_500, EXPECT_EQ(test_case.expected_is_top_500,
result.matching_top_domain.is_top_500); result.matching_top_domain.is_top_500);
EXPECT_EQ(test_case.expected_spoof_check_result, result.spoof_check_result);
} }
} }
......
...@@ -31,13 +31,23 @@ IDNConversionResult IDNToUnicodeWithAdjustments( ...@@ -31,13 +31,23 @@ IDNConversionResult IDNToUnicodeWithAdjustments(
base::StringPiece host, base::StringPiece host,
base::OffsetAdjuster::Adjustments* adjustments); base::OffsetAdjuster::Adjustments* adjustments);
bool IDNToUnicodeOneComponent(const base::char16* comp, // Result of converting a single IDN component (i.e. label) to unicode.
size_t comp_len, struct ComponentResult {
base::StringPiece top_level_domain, // Set to true if the component is converted to unicode.
base::StringPiece16 top_level_domain_unicode, bool converted = false;
bool enable_spoof_checks, // Set to true if the component is IDN, even if it's not converted to unicode.
base::string16* out, bool has_idn_component = false;
bool* has_idn_component); // Result of the IDN spoof check.
IDNSpoofChecker::Result spoof_check_result = IDNSpoofChecker::Result::kNone;
};
ComponentResult IDNToUnicodeOneComponent(
const base::char16* comp,
size_t comp_len,
base::StringPiece top_level_domain,
base::StringPiece16 top_level_domain_unicode,
bool ignore_spoof_check_results,
base::string16* out);
class AppendComponentTransform { class AppendComponentTransform {
public: public:
...@@ -249,17 +259,17 @@ void GetTopLevelDomain(base::StringPiece host, ...@@ -249,17 +259,17 @@ void GetTopLevelDomain(base::StringPiece host,
tld16.reserve(top_level_domain->length()); tld16.reserve(top_level_domain->length());
tld16.insert(tld16.end(), top_level_domain->begin(), top_level_domain->end()); tld16.insert(tld16.end(), top_level_domain->begin(), top_level_domain->end());
// Convert the TLD to unicode with the spoof checks disabled. // Convert the TLD to unicode, ignoring the spoof check results. This will
bool tld_has_idn_component = false; // always decode the input to unicode as long as it's valid punycode.
IDNToUnicodeOneComponent(tld16.data(), tld16.size(), std::string(), IDNToUnicodeOneComponent(
base::string16(), false /* enable_spoof_checks */, tld16.data(), tld16.size(), std::string(), base::string16(),
top_level_domain_unicode, &tld_has_idn_component); /*ignore_spoof_check_results=*/true, top_level_domain_unicode);
} }
IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
base::StringPiece host, base::StringPiece host,
base::OffsetAdjuster::Adjustments* adjustments, base::OffsetAdjuster::Adjustments* adjustments,
bool enable_spoof_checks) { bool ignore_spoof_check_results) {
if (adjustments) if (adjustments)
adjustments->clear(); adjustments->clear();
// Convert the ASCII input to a base::string16 for ICU. // Convert the ASCII input to a base::string16 for ICU.
...@@ -284,19 +294,23 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( ...@@ -284,19 +294,23 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
component_end = host16.length(); // For getting the last component. component_end = host16.length(); // For getting the last component.
size_t component_length = component_end - component_start; size_t component_length = component_end - component_start;
size_t new_component_start = out16.length(); size_t new_component_start = out16.length();
bool converted_idn = false; ComponentResult component_result;
if (component_end > component_start) { if (component_end > component_start) {
// Add the substring that we just found. // Add the substring that we just found.
bool has_idn_component = false; component_result = IDNToUnicodeOneComponent(
converted_idn = IDNToUnicodeOneComponent(
host16.data() + component_start, component_length, top_level_domain, host16.data() + component_start, component_length, top_level_domain,
top_level_domain_unicode, enable_spoof_checks, &out16, top_level_domain_unicode, ignore_spoof_check_results, &out16);
&has_idn_component); result.has_idn_component |= component_result.has_idn_component;
result.has_idn_component |= has_idn_component; if (component_result.spoof_check_result !=
IDNSpoofChecker::Result::kNone &&
(result.spoof_check_result == IDNSpoofChecker::Result::kNone ||
result.spoof_check_result == IDNSpoofChecker::Result::kSafe)) {
result.spoof_check_result = component_result.spoof_check_result;
}
} }
size_t new_component_length = out16.length() - new_component_start; size_t new_component_length = out16.length() - new_component_start;
if (converted_idn && adjustments) { if (component_result.converted && adjustments) {
adjustments->push_back(base::OffsetAdjuster::Adjustment( adjustments->push_back(base::OffsetAdjuster::Adjustment(
component_start, component_length, new_component_length)); component_start, component_length, new_component_length));
} }
...@@ -312,7 +326,8 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( ...@@ -312,7 +326,8 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
if (result.has_idn_component) { if (result.has_idn_component) {
result.matching_top_domain = result.matching_top_domain =
g_idn_spoof_checker.Get().GetSimilarTopDomain(out16); g_idn_spoof_checker.Get().GetSimilarTopDomain(out16);
if (enable_spoof_checks && !result.matching_top_domain.domain.empty()) { if (!ignore_spoof_check_results &&
!result.matching_top_domain.domain.empty()) {
if (adjustments) if (adjustments)
adjustments->clear(); adjustments->clear();
result.result = host16; result.result = host16;
...@@ -327,22 +342,25 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl( ...@@ -327,22 +342,25 @@ IDNConversionResult IDNToUnicodeWithAdjustmentsImpl(
IDNConversionResult IDNToUnicodeWithAdjustments( IDNConversionResult IDNToUnicodeWithAdjustments(
base::StringPiece host, base::StringPiece host,
base::OffsetAdjuster::Adjustments* adjustments) { base::OffsetAdjuster::Adjustments* adjustments) {
return IDNToUnicodeWithAdjustmentsImpl(host, adjustments, true); return IDNToUnicodeWithAdjustmentsImpl(host, adjustments,
/*ignore_spoof_check_results=*/false);
} }
IDNConversionResult UnsafeIDNToUnicodeWithAdjustments( IDNConversionResult UnsafeIDNToUnicodeWithAdjustments(
base::StringPiece host, base::StringPiece host,
base::OffsetAdjuster::Adjustments* adjustments) { base::OffsetAdjuster::Adjustments* adjustments) {
return IDNToUnicodeWithAdjustmentsImpl(host, adjustments, false); return IDNToUnicodeWithAdjustmentsImpl(host, adjustments,
/*ignore_spoof_check_results=*/true);
} }
// Returns true if the given Unicode host component is safe to display to the // Returns true if the given Unicode host component is safe to display to the
// user. Note that this function does not deal with pure ASCII domain labels at // user. Note that this function does not deal with pure ASCII domain labels at
// all even though it's possible to make up look-alike labels with ASCII // all even though it's possible to make up look-alike labels with ASCII
// characters alone. // characters alone.
bool IsIDNComponentSafe(base::StringPiece16 label, IDNSpoofChecker::Result SpoofCheckIDNComponent(
base::StringPiece top_level_domain, base::StringPiece16 label,
base::StringPiece16 top_level_domain_unicode) { base::StringPiece top_level_domain,
base::StringPiece16 top_level_domain_unicode) {
return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode( return g_idn_spoof_checker.Get().SafeToDisplayAsUnicode(
label, top_level_domain, top_level_domain_unicode); label, top_level_domain, top_level_domain_unicode);
} }
...@@ -387,25 +405,23 @@ struct UIDNAWrapper { ...@@ -387,25 +405,23 @@ struct UIDNAWrapper {
base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER; base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = LAZY_INSTANCE_INITIALIZER;
// Converts one component (label) of a host (between dots) to Unicode if safe. // Converts one component (label) of a host (between dots) to Unicode if safe.
// If |enable_spoof_checks| is false and input is valid unicode, skips spoof // If |ignore_spoof_check_results| is true and input is valid unicode, ignores
// checks and always converts to unicode. // spoof check results and always converts the input to unicode. The result will
// The result will be APPENDED to the given output string and will be the // be APPENDED to the given output string and will be the same as the input if
// same as the input if it is not IDN in ACE/punycode or the IDN is unsafe to // it is not IDN in ACE/punycode or the IDN is unsafe to display. Returns true
// display. // if conversion was made. Sets |has_idn_component| to true if the input has
// Returns true if conversion was made. Sets |has_idn_component| to true if the // IDN, regardless of whether it was converted to unicode or not.
// input has IDN, regardless of whether it was converted to unicode or not. ComponentResult IDNToUnicodeOneComponent(
bool IDNToUnicodeOneComponent(const base::char16* comp, const base::char16* comp,
size_t comp_len, size_t comp_len,
base::StringPiece top_level_domain, base::StringPiece top_level_domain,
base::StringPiece16 top_level_domain_unicode, base::StringPiece16 top_level_domain_unicode,
bool enable_spoof_checks, bool ignore_spoof_check_results,
base::string16* out, base::string16* out) {
bool* has_idn_component) {
DCHECK(out); DCHECK(out);
DCHECK(has_idn_component); ComponentResult result;
*has_idn_component = false;
if (comp_len == 0) if (comp_len == 0)
return false; return result;
// Early return if the input cannot be an IDN component. // Early return if the input cannot be an IDN component.
// Valid punycode must not end with a dash. // Valid punycode must not end with a dash.
...@@ -414,7 +430,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, ...@@ -414,7 +430,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp,
memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix)) != 0 || memcmp(comp, kIdnPrefix, sizeof(kIdnPrefix)) != 0 ||
comp[comp_len - 1] == '-') { comp[comp_len - 1] == '-') {
out->append(comp, comp_len); out->append(comp, comp_len);
return false; return result;
} }
UIDNA* uidna = g_uidna.Get().value; UIDNA* uidna = g_uidna.Get().value;
...@@ -435,20 +451,21 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, ...@@ -435,20 +451,21 @@ bool IDNToUnicodeOneComponent(const base::char16* comp,
} while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));
if (U_SUCCESS(status) && info.errors == 0) { if (U_SUCCESS(status) && info.errors == 0) {
*has_idn_component = true; result.has_idn_component = true;
// Converted successfully. At this point the length of the output string // Converted successfully. At this point the length of the output string
// is original_length + output_length which may be shorter than the current // is original_length + output_length which may be shorter than the current
// length of |out|. Trim |out| and ensure that the converted component can // length of |out|. Trim |out| and ensure that the converted component can
// be safely displayed to the user. // be safely displayed to the user.
out->resize(original_length + output_length); out->resize(original_length + output_length);
if (!enable_spoof_checks) { result.spoof_check_result = SpoofCheckIDNComponent(
return true; base::StringPiece16(out->data() + original_length,
} base::checked_cast<size_t>(output_length)),
if (IsIDNComponentSafe( top_level_domain, top_level_domain_unicode);
base::StringPiece16(out->data() + original_length, DCHECK_NE(IDNSpoofChecker::Result::kNone, result.spoof_check_result);
base::checked_cast<size_t>(output_length)), if (ignore_spoof_check_results ||
top_level_domain, top_level_domain_unicode)) { result.spoof_check_result == IDNSpoofChecker::Result::kSafe) {
return true; result.converted = true;
return result;
} }
} }
...@@ -456,7 +473,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, ...@@ -456,7 +473,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp,
// original string and append the literal input. // original string and append the literal input.
out->resize(original_length); out->resize(original_length);
out->append(comp, comp_len); out->append(comp, comp_len);
return false; return result;
} }
} // namespace } // namespace
......
...@@ -56,6 +56,12 @@ struct IDNConversionResult { ...@@ -56,6 +56,12 @@ struct IDNConversionResult {
// E.g. IDNToUnicodeWithDetails("googlé.com") will fill |result| with // E.g. IDNToUnicodeWithDetails("googlé.com") will fill |result| with
// "xn--googl-fsa.com" and |matching_top_domain.domain| with "google.com". // "xn--googl-fsa.com" and |matching_top_domain.domain| with "google.com".
TopDomainEntry matching_top_domain; TopDomainEntry matching_top_domain;
// Result of the spoof check. If the domain was converted to unicode, this
// must be kSafe. Otherwise, this will be the failure reason
// for the domain component (i.e. label) that failed the spoof checks. If
// multiple labels fail the checks, this will be the result of the first
// component that failed, counting from the left in the punycode form.
IDNSpoofChecker::Result spoof_check_result = IDNSpoofChecker::Result::kNone;
}; };
// Nothing is omitted. // Nothing is omitted.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment