Commit c626584f authored by meacer's avatar meacer Committed by Commit Bot

Add whole-script-confusable characters for Hebrew

This CL adds a list of characters that look like Latin characters. A
domain consisting only of these characters and not having the .il ccTLD
will be converted to punycode, as is done for Cyrillic Latin-lookalikes.

The impact of this change is minimal on existing domains with <5 domains
being affected.

Bug: 722167
Change-Id: I0fd9c13331b10845958612f58fb6353d5fdb6c88
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1975054Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#726475}
parent b887c52e
...@@ -197,6 +197,12 @@ IDNSpoofChecker::IDNSpoofChecker() { ...@@ -197,6 +197,12 @@ IDNSpoofChecker::IDNSpoofChecker() {
"[аысԁеԍһіюјӏорԗԛѕԝхуъЬҽпгѵѡ]", "[аысԁеԍһіюјӏорԗԛѕԝхуъЬҽпгѵѡ]",
// TLDs containing most of the Cyrillic domains. // TLDs containing most of the Cyrillic domains.
{"bg", "by", "kz", "pyc", "ru", "su", "ua", "uz"}}, {"bg", "by", "kz", "pyc", "ru", "su", "ua", "uz"}},
{// Hebrew
"[[:Hebr:]]",
"[דוחיןסװײ׳ﬦ]",
// TLDs containing most of the Hebrew domains.
{"il"}},
}; };
for (const WholeScriptConfusableData& data : kWholeScriptConfusables) { for (const WholeScriptConfusableData& data : kWholeScriptConfusables) {
auto all_letters = std::make_unique<icu::UnicodeSet>( auto all_letters = std::make_unique<icu::UnicodeSet>(
......
...@@ -1172,7 +1172,10 @@ const IDNTestCase kIdnCases[] = { ...@@ -1172,7 +1172,10 @@ const IDNTestCase kIdnCases[] = {
{"xn--googlecom-lg9q.com", L"google工com.com", false}, // (U+5DE5) {"xn--googlecom-lg9q.com", L"google工com.com", false}, // (U+5DE5)
{"xn--googlecom-g040a.com", L"google讠com.com", false}, // (U+8BA0) {"xn--googlecom-g040a.com", L"google讠com.com", false}, // (U+8BA0)
{"xn--googlecom-b85n.com", L"google丁com.com", false}, // (U+4E01) {"xn--googlecom-b85n.com", L"google丁com.com", false}, // (U+4E01)
}; // namespace
{"xn--7dbh4a.com", L"חסד.com", false},
{"xn--7dbh4a.il", L"חסד.il", true},
}; // namespace
namespace test { namespace test {
#include "components/url_formatter/spoof_checks/top_domains/test_domains-trie-inc.cc" #include "components/url_formatter/spoof_checks/top_domains/test_domains-trie-inc.cc"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment