Commit c7c61c62 authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

IDN Display: Add whole-script-confusable characters for Myanmar

This CL adds a list of Myanmar characters that look like Latin
characters. A domain consisting only of these characters and not
having a .mm ccTLD or other Burmese TLDs will be
converted to punycode, as is done for other whole-script-confusables
such as Cyrillic and Hebrew.

No popular domain is affected by this change.

Bug: 722167
Change-Id: I8d099e716f79ef42ab3205fe590e5e9b9a1db7a4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2005869Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Commit-Queue: Joe DeBlasio <jdeblasio@chromium.org>
Cr-Commit-Position: refs/heads/master@{#733059}
parent df4ede0f
......@@ -243,6 +243,15 @@ IDNSpoofChecker::IDNSpoofChecker() {
"[[:Taml:]]", "[டப௦]"},
{// Telugu
"[[:Telu:]]", "[౦౧]"},
{// Myanmar. Shan digits (႐႑႕႖႗) are already blocked from mixing with
// other Myanmar characters. However, they can still be used to form
// WSC spoofs, so they are included here (they are encoded because macOS
// doesn't display them properly).
// U+104A (၊) and U+U+104A(။) are excluded as they are signs and are
// blocked.
"[[:Mymr:]]",
"[ခဂငထပဝ၀၂ၔၜ\u1090\u1091\u1095\u1096\u1097]",
{"mm"}},
};
for (const WholeScriptConfusableData& data : kWholeScriptConfusables) {
auto all_letters = std::make_unique<icu::UnicodeSet>(
......
......@@ -1219,10 +1219,19 @@ const IDNTestCase kIdnCases[] = {
{"xn--gpd3ag.com", L"ჽჿხ.com", kUnsafe},
{"xn--gpd3ag.ge", L"ჽჿხ.ge", kSafe},
{"xn--gpd3ag.xn--node", L"ჽჿხ.გე", kSafe},
// Hebrew.
// Hebrew:
{"xn--7dbh4a.com", L"חסד.com", kUnsafe},
{"xn--7dbh4a.il", L"חסד.il", kSafe},
{"xn--9dbq2a.xn--7dbh4a", L"קום.חסד", kSafe},
// Myanmar:
{"xn--oidbbf41a.com", L"င၀ဂခဂ.com", kUnsafe},
{"xn--oidbbf41a.mm", L"င၀ဂခဂ.mm", kSafe},
{"xn--oidbbf41a.xn--7idjb0f4ck", L"င၀ဂခဂ.မြန်မာ", kSafe},
// Myanmar Shan digits:
{"xn--rmdcmef.com", L"႐႑႕႖႗.com", kUnsafe},
{"xn--rmdcmef.mm", L"႐႑႕႖႗.mm", kSafe},
{"xn--rmdcmef.xn--7idjb0f4ck", L"႐႑႕႖႗.မြန်မာ", kSafe},
// Indic scripts:
// Bengali:
{"xn--07baub.com", L"০৭০৭.com", kUnsafe},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment