Commit 37747f4a authored by Jungshik Shin's avatar Jungshik Shin Committed by Commit Bot

Add more entries to the confusability mapping

U+014B (ŋ) => n
U+1004 (င) => c
U+100c (ဌ) => g
U+1042 (၂) => j
U+1054 (ၔ) => e

Bug: 811117,808316
Test: components_unittests -gtest_filter=*IDN*
Change-Id: I29f73c48d665bd9070050bd7f0080563635b9c63
Reviewed-on: https://chromium-review.googlesource.com/919423Reviewed-by: default avatarPeter Kasting <pkasting@chromium.org>
Commit-Queue: Jungshik Shin <jshin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#536955}
parent 848d697d
......@@ -152,34 +152,35 @@ IDNSpoofChecker::IDNSpoofChecker() {
// U+04C8 (ӈ), U+0527 (ԧ), U+0529 (ԩ)} => h
// - {U+0138 (ĸ), U+03BA (κ), U+043A (к), U+049B (қ), U+049D (ҝ),
// U+049F (ҟ), U+04A1(ҡ), U+04C4 (ӄ), U+051F (ԟ)} => k
// - {U+014B (ŋ), U+043F (п)} => n
// - {U+0167 (ŧ), U+0442 (т), U+04AD (ҭ)} => t
// - {U+0185 (ƅ), U+044C (ь), U+048D (ҍ), U+0432 (в)} => b
// - {U+03C9 (ω), U+0448 (ш), U+0449 (щ)} => w
// - {U+043C (м), U+04CE (ӎ)} => m
// - U+043F (п) => n
// - {U+0454 (є), U+04BD (ҽ), U+04BF (ҿ)} => e
// - {U+0454 (є), U+04BD (ҽ), U+04BF (ҿ), U+1054 (ၔ)} => e
// - U+0491 (ґ) => r
// - U+0493 (ғ) => f
// - U+04AB (ҫ) => c
// - {U+04AB (ҫ), U+1004 (င)} => c
// - U+04B1 (ұ) => y
// - U+03C7 (χ), U+04B3 (ҳ), U+04FD (ӽ), U+04FF (ӿ) => x
// - U+04CF (ӏ) => i (on Windows), l (elsewhere)
// - U+0503 (ԃ) => d
// - U+050D (ԍ) => g
// - {U+050D (ԍ), U+100c (ဌ)} => g
// - U+0D1F (ട) => s
// - U+1042 (၂) => j
extra_confusable_mapper_.reset(icu::Transliterator::createFromRules(
UNICODE_STRING_SIMPLE("ExtraConf"),
icu::UnicodeString::fromUTF8("[þϼҏ] > p; [ħнћңҥӈԧԩ] > h;"
"[ĸκкқҝҟҡӄԟ] > k; [ŧтҭ] > t;"
"[ĸκкқҝҟҡӄԟ] > k; [ŋп] > n; [ŧтҭ] > t;"
"[ƅьҍв] > b; [ωшщ] > w; [мӎ] > m;"
"п > n; [єҽҿ] > e; ґ > r; ғ > f; ҫ > c;"
"[єҽҿၔ] > e; ґ > r; ғ > f; [ҫင] > c;"
"ұ > y; [χҳӽӿ] > x;"
#if defined(OS_WIN)
"ӏ > i;"
#else
"ӏ > l;"
#endif
"ԃ > d; ԍ > g; ട > s"),
"ԃ > d; [ԍဌ] > g; ട > s; ၂ > j"),
UTRANS_FORWARD, parse_error, status));
DCHECK(U_SUCCESS(status))
<< "Spoofchecker initalization failed due to an error: "
......
......@@ -12,3 +12,4 @@ rf.com
cyxe.com
ldg.com
idg.com
cegjo.com
......@@ -22,4 +22,5 @@ rf.corn, 1
cyxe.corn, 1
ldg.corn, 1
idg.corn, 1
cegjo.corn, 1
%%
......@@ -478,6 +478,8 @@ const IDNTestCase idn_cases[] = {
{"xn--m1a4ne5jry.com", L"\x048f\x043d\x051f\x04ad\x048d.com", false},
// ҏнԟҭв.com
{"xn--b1av9v8dry.com", L"\x048f\x043d\x051f\x04ad\x0432.com", false},
// wmŋr.com
{"xn--wmr-jxa.com", L"wm\x014br.com", false},
// шмпґ.com
{"xn--l1agz80a.com", L"\x0448\x043c\x043f\x0491.com", false},
// щмпґ.com
......@@ -499,6 +501,9 @@ const IDNTestCase idn_cases[] = {
// ӏԃԍ.com
{"xn--s5a8h4a.com", L"\x04cf\x0503\x050d.com", false},
// ငၔဌ၂ဝ.com (entirely made of Myanmar characters)
{"xn--ridq5c9hnd.com", L"\x1004\x1054\x100c" L"\x1042\x101d.com", false},
// At one point the skeleton of 'w' was 'vv', ensure that
// that it's treated as 'w'.
{"xn--wder-qqa.com",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment