Commit 1e9a4a24 authored by meacer's avatar meacer Committed by Commit Bot

Restrict Latin Small Letter Eth (U+00F0) to Icelandic domains

crrev.com/c/1879992 restricted Latin Small Letter Thorn to Icelandic
domains. This CL does the same for Eth (ð) as it can be confused with
the characters "o" and "d" in some fonts.

This change affects less than 10 real world domains with limited popularity.

Bug: 1017707, 929711
Change-Id: I037054530feb1d34e9243ef5da35cf431f3b80b5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1881344Reviewed-by: default avatarChristopher Thompson <cthomp@chromium.org>
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#709580}
parent 6a4f45ee
......@@ -170,6 +170,14 @@ IDNSpoofChecker::IDNSpoofChecker() {
status);
lgc_letters_n_ascii_.freeze();
// Latin small letter thorn ("þ", U+00FE) can be used to spoof both b and p.
// It's used in modern Icelandic orthography, so allow it for the Icelandic
// ccTLD (.is) but block in any other TLD. Also block Latin small letter eth
// ("ð", U+00F0) which can be used to spoof the letter o.
icelandic_characters_ =
icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u00fe\\u00f0]"), status);
icelandic_characters_.freeze();
// Used for diacritics-removal before the skeleton calculation. Add
// "ł > l; ø > o; đ > d" that are not handled by "NFD; Nonspacing mark
// removal; NFC".
......@@ -284,13 +292,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
if (deviation_characters_.containsSome(label_string))
return false;
// Latin small letter thorn (U+00FE) can be used to spoof both b and p. It's
// used in modern Icelandic orthography, so allow it for the Icelandic ccTLD
// (.is) but block in any other TLD.
if (label_string.length() > 1 && label_string.indexOf("þ") != -1 &&
top_level_domain != ".is") {
// Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is).
if (label_string.length() > 1 && top_level_domain != ".is" &&
icelandic_characters_.containsSome(label_string))
return false;
}
// If there's no script mixing, the input is regarded as safe without any
// extra check unless it falls into one of three categories:
......
......@@ -102,6 +102,7 @@ class IDNSpoofChecker {
icu::UnicodeSet cyrillic_letters_;
icu::UnicodeSet cyrillic_letters_latin_alike_;
icu::UnicodeSet lgc_letters_n_ascii_;
icu::UnicodeSet icelandic_characters_;
std::unique_ptr<icu::Transliterator> diacritic_remover_;
std::unique_ptr<icu::Transliterator> extra_confusable_mapper_;
......
......@@ -1091,6 +1091,11 @@ const IDNTestCase kIdnCases[] = {
// U+05D7 can look like Latin n in many fonts.
{"xn--ceba.com", L"חח.com", false},
// U+00FE (þ) and U+00F0 (ð) are only allowed under the .is TLD.
{"xn--acdef-wva.com", L"aþcdef.com", false},
{"xn--mnpqr-jta.com", L"mnðpqr.com", false},
{"xn--acdef-wva.is", L"aþcdef.is", true},
{"xn--mnpqr-jta.is", L"mnðpqr.is", true},
}; // namespace
namespace test {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment