Commit de165b66 authored by meacer's avatar meacer Committed by Commit Bot

IDN Display: Add whole-script-confusable characters for Greek

This CL adds a list of Greek characters that look like Latin
characters. A domain consisting only of these characters and not
having a .gr ccTLD or other Greek TLDs such as .ελ will be
converted to punycode, as is done for other
whole-script-confusables such as Cyrillic and Hebrew.

Bug: 722167
Change-Id: Id0157503c605a6a73eb49147c26b48d5995d1ff2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1986209
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Cr-Commit-Position: refs/heads/master@{#728697}
parent 6bd113d3
......@@ -9,6 +9,7 @@
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/threading/thread_local_storage.h"
#include "build/build_config.h"
#include "net/base/lookup_string_in_fixed_set.h"
......@@ -209,12 +210,20 @@ IDNSpoofChecker::IDNSpoofChecker() {
// (Potential set: [ሀሁሃሠሡሰሱሲስበቡቢተቱቲታነከኩኪካኬክዐዑዕዖዘዙዚዛዝዞጠጡጢጣጦፐፒꬁꬂꬅ])
"[[:Ethi:]]",
"[ሀሠሰስበነተከዐዕዘጠፐꬅ]",
{"er", "et"}}};
{"er", "et"}},
{// Greek
"[[:Grek:]]",
// This ignores variants such as ά, έ, ή, ί.
"[αικνρυωηοτ]",
{"gr"}},
};
for (const WholeScriptConfusableData& data : kWholeScriptConfusables) {
auto all_letters = std::make_unique<icu::UnicodeSet>(
icu::UnicodeString::fromUTF8(data.script_regex), status);
DCHECK(U_SUCCESS(status));
auto latin_lookalikes = std::make_unique<icu::UnicodeSet>(
icu::UnicodeString::fromUTF8(data.latin_lookalike_letters), status);
DCHECK(U_SUCCESS(status));
auto script = std::make_unique<WholeScriptConfusable>(
std::move(all_letters), std::move(latin_lookalikes), data.allowed_tlds);
wholescriptconfusables_.push_back(std::move(script));
......
......@@ -100,7 +100,9 @@ class IDNSpoofChecker {
const std::vector<std::string>& allowed_tlds);
~WholeScriptConfusable();
// Captures all letters belonging to this script.
// Captures all letters belonging to this script. See kScriptNameCodeList in
// blink/renderer/platform/text/locale_to_script_mapping.cc for script
// codes.
std::unique_ptr<icu::UnicodeSet> all_letters;
// The subset of all_letters that look like Latin ASCII letters. A domain
// label entirely made of them is blocked as a simplified
......
......@@ -1207,6 +1207,11 @@ const IDNTestCase kIdnCases[] = {
// Whole-script-confusable in Ethiopic.
{"xn--6xd66aa62c.com", L"ሠዐዐፐ.com", kUnsafe},
{"xn--6xd66aa62c.et", L"ሠዐዐፐ.et", kSafe},
// Whole-script-confusable in Greek.
{"xn--mxapd.com", L"ικα.com", kUnsafe},
{"xn--mxapd.gr", L"ικα.gr", kSafe},
{"xn--mxapd.xn--qxam", L"ικα.ελ", kSafe},
};
namespace test {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment