Commit de9acc5c authored by Jungshik Shin's avatar Jungshik Shin Committed by Commit Bot

Add more to confusables list

U+04FB (ӻ) to f
U+050F (ԏ) to t
U+050B (ԋ) and U+0527 (ԧ) to h
U+0437(з) and U+04E1(ӡ) to 3

Add tests for the above entries and tests for ASCII-digit spoofing.

Bug: 816769,820068
Test: components_unittests --gtest_filter=*IDN*
Change-Id: I6cd0a7e97cd0ec2df522ce30f632acfd7b78eee2
Reviewed-on: https://chromium-review.googlesource.com/962875Reviewed-by: default avatarPeter Kasting <pkasting@chromium.org>
Commit-Queue: Peter Kasting <pkasting@chromium.org>
Cr-Commit-Position: refs/heads/master@{#543600}
parent 1f2a366a
......@@ -155,17 +155,17 @@ IDNSpoofChecker::IDNSpoofChecker() {
// Supplement the Unicode confusable list by the following mapping.
// - {U+00FE (þ), U+03FC (ϼ), U+048F (ҏ)} => p
// - {U+0127 (ħ), U+043D (н), U+045B (ћ), U+04A3 (ң), U+04A5 (ҥ),
// U+04C8 (ӈ), U+04CA (ӊ), U+0527 (ԧ), U+0529 (ԩ)} => h
// U+04C8 (ӈ), U+04CA (ӊ), U+050B (ԋ), U+0527 (ԧ), U+0529 (ԩ)} => h
// - {U+0138 (ĸ), U+03BA (κ), U+043A (к), U+049B (қ), U+049D (ҝ),
// U+049F (ҟ), U+04A1(ҡ), U+04C4 (ӄ), U+051F (ԟ)} => k
// - {U+014B (ŋ), U+043F (п)} => n
// - {U+0167 (ŧ), U+0442 (т), U+04AD (ҭ)} => t
// - {U+0167 (ŧ), U+0442 (т), U+04AD (ҭ), U+050F (ԏ)} => t
// - {U+0185 (ƅ), U+044C (ь), U+048D (ҍ), U+0432 (в)} => b
// - {U+03C9 (ω), U+0448 (ш), U+0449 (щ), U+0E1F (ฟ)} => w
// - {U+043C (м), U+04CE (ӎ)} => m
// - {U+0454 (є), U+04BD (ҽ), U+04BF (ҿ), U+1054 (ၔ)} => e
// - U+0491 (ґ) => r
// - U+0493 (ғ) => f
// - {U+0493 (ғ), U+04FB (ӻ)} => f
// - {U+04AB (ҫ), U+1004 (င)} => c
// - U+04B1 (ұ) => y
// - U+03C7 (χ), U+04B3 (ҳ), U+04FD (ӽ), U+04FF (ӿ) => x
......@@ -174,19 +174,21 @@ IDNSpoofChecker::IDNSpoofChecker() {
// - {U+050D (ԍ), U+100c (ဌ)} => g
// - {U+0D1F (ട), U+0E23 (ร)} => s
// - U+1042 (၂) => j
// - {U+0437 (з), U+04E1 (ӡ)} => 3
extra_confusable_mapper_.reset(icu::Transliterator::createFromRules(
UNICODE_STRING_SIMPLE("ExtraConf"),
icu::UnicodeString::fromUTF8("[þϼҏ] > p; [ħнћңҥӈӊԧԩ] > h;"
"[ĸκкқҝҟҡӄԟ] > k; [ŋп] > n; [ŧтҭ] > t;"
icu::UnicodeString::fromUTF8("[þϼҏ] > p; [ħнћңҥӈӊԋԧԩ] > h;"
"[ĸκкқҝҟҡӄԟ] > k; [ŋп] > n; [ŧтҭԏ] > t;"
"[ƅьҍв] > b; [ωшщฟ] > w; [мӎ] > m;"
"[єҽҿၔ] > e; ґ > r; ғ > f; [ҫင] > c;"
"[єҽҿၔ] > e; ґ > r; [ғӻ] > f; [ҫင] > c;"
"ұ > y; [χҳӽӿ] > x;"
#if defined(OS_WIN)
"ӏ > i;"
#else
"ӏ > l;"
#endif
"ԃ > d; [ԍဌ] > g; [ടร] > s; ၂ > j"),
"ԃ > d; [ԍဌ] > g; [ടร] > s; ၂ > j;"
"[зӡ] > 3"),
UTRANS_FORWARD, parse_error, status));
DCHECK(U_SUCCESS(status))
<< "Spoofchecker initalization failed due to an error: "
......
......@@ -14,3 +14,4 @@ ldg.com
idg.com
cegjo.com
wsws.com
1234567890.com
......@@ -24,4 +24,5 @@ ldg.corn, 1
idg.corn, 1
cegjo.corn, 1
wsws.corn, 1
l23456789O.corn, 1
%%
......@@ -423,6 +423,12 @@ const IDNTestCase idn_cases[] = {
{"xn--l1acpzs.com", L"\x0449\x043c\x043d\x0442\x044c.com", false},
// шмнтв.com
{"xn--b1atdu1a.com", L"\x0448\x043c\x043d\x0442\x0432.com", false},
// шмԋтв.com
{"xn--b1atsw09g.com", L"\x0448\x043c\x050b\x0442\x0432.com", false},
// шмԧтв.com
{"xn--b1atsw03i.com", L"\x0448\x043c\x0527\x0442\x0432.com", false},
// шмԋԏв.com
{"xn--b1at9a12dua.com", L"\x0448\x043c\x050b\x050f\x0432.com", false},
// ഠട345.com
{"xn--345-jtke.com",
L"\x0d20\x0d1f"
......@@ -490,6 +496,8 @@ const IDNTestCase idn_cases[] = {
{"xn--o1at1tsi.com", L"\x0449\x04ce\x043f\x0491.com", false},
// ґғ.com
{"xn--03ae.com", L"\x0491\x0493.com", false},
// ґӻ.com
{"xn--03a6s.com", L"\x0491\x04fb.com", false},
// ҫұҳҽ.com
{"xn--r4amg4b.com", L"\x04ab\x04b1\x04b3\x04bd.com", false},
// ҫұӽҽ.com
......@@ -503,6 +511,25 @@ const IDNTestCase idn_cases[] = {
// ӏԃԍ.com
{"xn--s5a8h4a.com", L"\x04cf\x0503\x050d.com", false},
// ꓲ2345б7890.com
{"xn--23457890-e7g93622b.com", L"\xa4f2" L"2345\x0431" L"7890.com", false},
// 1ᒿ345б7890.com
{"xn--13457890-e7g0943b.com", L"1\x14bf" L"345\x0431" L"7890.com", false},
// 12з4567890.com
{"xn--124567890-10h.com", L"12\x0437" L"4567890.com", false},
// 12ӡ4567890.com
{"xn--124567890-mfj.com", L"12\x04e1" L"4567890.com", false},
// 123Ꮞ567890.com
{"xn--123567890-dm4b.com", L"123\x13ce" L"567890.com", false},
// 12345б7890.com
{"xn--123457890-fzh.com", L"12345\x0431" L"7890.com", false},
// 1234567ȣ90.com
{"xn--123456790-6od.com", L"1234567\x0223" L"90.com", false},
// 12345678୨0.com
{"xn--123456780-71w.com", L"12345678\x0b68" L"0.com", false},
// 123456789ꓳ.com
{"xn--123456789-tx75a.com", L"123456789\xa4f3.com", false},
// ငၔဌ၂ဝ.com (entirely made of Myanmar characters)
{"xn--ridq5c9hnd.com", L"\x1004\x1054\x100c" L"\x1042\x101d.com", false},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment