Commit ff1ccf50 authored by Rob Buis's avatar Rob Buis Committed by Commit Bot

[mathml] Add helper functionality for operator dictionary

Add a method to find the category for a given (String, form) pair as
described in the spec [1] as well as unit tests.

[1] https://mathml-refresh.github.io/mathml-core/#operator-dictionary

Bug: 6606
Change-Id: I87aceb73720bf0e023d11767cde823ec4e04f185
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2375207Reviewed-by: default avatarDominik Röttsches <drott@chromium.org>
Reviewed-by: default avatarFrédéric Wang <fwang@igalia.com>
Commit-Queue: Rob Buis <rbuis@igalia.com>
Cr-Commit-Position: refs/heads/master@{#801775}
parent bd54133f
...@@ -1381,6 +1381,8 @@ component("platform") { ...@@ -1381,6 +1381,8 @@ component("platform") {
"text/locale_win.cc", "text/locale_win.cc",
"text/locale_win.h", "text/locale_win.h",
"text/mac/hyphenation_mac.cc", "text/mac/hyphenation_mac.cc",
"text/mathml_operator_dictionary.cc",
"text/mathml_operator_dictionary.h",
"text/platform_locale.cc", "text/platform_locale.cc",
"text/platform_locale.h", "text/platform_locale.h",
"text/segmented_string.cc", "text/segmented_string.cc",
...@@ -2000,6 +2002,7 @@ source_set("blink_platform_unittests_sources") { ...@@ -2000,6 +2002,7 @@ source_set("blink_platform_unittests_sources") {
"text/hyphenation_test.cc", "text/hyphenation_test.cc",
"text/icu_error_test.cc", "text/icu_error_test.cc",
"text/layout_locale_test.cc", "text/layout_locale_test.cc",
"text/mathml_operator_dictionary_test.cc",
"text/platform_locale_test.cc", "text/platform_locale_test.cc",
"text/segmented_string_test.cc", "text/segmented_string_test.cc",
"text/suffix_tree_test.cc", "text/suffix_tree_test.cc",
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/text/mathml_operator_dictionary.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
namespace blink {
namespace {
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-compact-special-tables
const char* operators_2_ascii_chars[] = {
"!!", "!=", "&&", "**", "*=", "++", "+=", "--", "-=", "->",
"..", "//", "/=", ":=", "<=", "<>", "==", ">=", "||"};
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-categories-hexa-table
struct EntryRange {
uint16_t entry;
unsigned range_bounds_delta : 4;
};
static inline uint16_t ExtractKey(const EntryRange& range) {
return range.entry & 0x3FFF;
}
static inline uint16_t ExtractCategory(const EntryRange& range) {
return range.entry >> 12;
}
// The following representation is taken from the spec, and reduces storage
// requirements by mapping codepoints and category to better make use of the
// available bytes. For details see
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary.
static const EntryRange compact_dictionary[] = {
{0x8025, 0}, {0x802A, 0}, {0x402B, 0}, {0x402D, 0}, {0x802E, 0},
{0x402F, 0}, {0x803F, 1}, {0xC05C, 0}, {0x805E, 1}, {0x807C, 0},
{0x40B1, 0}, {0x80B7, 0}, {0x80D7, 0}, {0x40F7, 0}, {0x4322, 0},
{0x8323, 0}, {0x832B, 0}, {0x832F, 0}, {0x8332, 0}, {0x8422, 0},
{0x8443, 0}, {0x4444, 0}, {0xC461, 3}, {0x0590, 9}, {0x059C, 15},
{0x05AC, 1}, {0x05AF, 6}, {0x05B9, 0}, {0x05BC, 15}, {0x05CC, 0},
{0x05D0, 13}, {0x05E0, 15}, {0x05F0, 0}, {0x05F3, 0}, {0x05F5, 1},
{0x05FD, 2}, {0x8606, 0}, {0x860E, 0}, {0x4612, 4}, {0x8617, 0},
{0x4618, 0}, {0x4624, 0}, {0x4627, 3}, {0x4636, 0}, {0x4638, 0},
{0x863F, 1}, {0x468C, 3}, {0x4693, 3}, {0x8697, 0}, {0x4698, 0},
{0x8699, 0}, {0x469D, 2}, {0x86A0, 1}, {0x46BB, 2}, {0x46C4, 0},
{0x86C5, 0}, {0x46C6, 0}, {0x86C7, 0}, {0x86C9, 3}, {0x46CE, 1},
{0x46D2, 1}, {0x8705, 1}, {0x89A0, 1}, {0x89AA, 1}, {0x89AD, 4},
{0x4B95, 2}, {0x8BCB, 0}, {0x8BCD, 0}, {0x0BF0, 1}, {0x4BF4, 0},
{0x0BF5, 10}, {0x0D0A, 6}, {0x0D12, 1}, {0x0D21, 1}, {0x0D4E, 15},
{0x0D5E, 3}, {0x0D6E, 1}, {0x8D81, 1}, {0x8D99, 1}, {0x8DB5, 0},
{0x4DBC, 0}, {0x8DC2, 1}, {0x8DC9, 4}, {0x8DD8, 1}, {0x8DDB, 0},
{0x8DDF, 1}, {0x8DE2, 0}, {0x8DE7, 6}, {0x4DF6, 0}, {0x8DF8, 3},
{0x8E1D, 4}, {0x4E22, 12}, {0x8E2F, 8}, {0x4E38, 2}, {0x8E3B, 2},
{0x8E3F, 0}, {0x4E40, 15}, {0x8E50, 0}, {0x4E51, 15}, {0x4E61, 2},
{0x4EDA, 1}, {0x8EDC, 1}, {0x4EFB, 0}, {0x4EFD, 0}, {0x8EFE, 0},
{0x4F32, 0}, {0x0F45, 1}, {0x1021, 0}, {0x5028, 0}, {0x102B, 0},
{0x102D, 0}, {0x505B, 0}, {0x507B, 1}, {0x10AC, 0}, {0x10B1, 0},
{0x1332, 0}, {0x5416, 0}, {0x1418, 0}, {0x141C, 0}, {0x1600, 1},
{0x1603, 1}, {0x1607, 0}, {0xD60F, 2}, {0x1612, 1}, {0x161F, 3},
{0x962B, 8}, {0x163C, 0}, {0x16BE, 1}, {0xD6C0, 3}, {0x5708, 0},
{0x570A, 0}, {0x1710, 0}, {0x1719, 0}, {0x5729, 0}, {0x5B72, 0},
{0x1B95, 1}, {0x1BC0, 0}, {0x5BE6, 0}, {0x5BE8, 0}, {0x5BEA, 0},
{0x5BEC, 0}, {0x5BEE, 0}, {0x5D80, 0}, {0x5D83, 0}, {0x5D85, 0},
{0x5D87, 0}, {0x5D89, 0}, {0x5D8B, 0}, {0x5D8D, 0}, {0x5D8F, 0},
{0x5D91, 0}, {0x5D93, 0}, {0x5D95, 0}, {0x5D97, 0}, {0x1D9B, 15},
{0x1DAB, 4}, {0x5DFC, 0}, {0xDE00, 10}, {0x9E0B, 15}, {0x9E1B, 1},
{0x1EEC, 1}, {0xDEFC, 0}, {0xDEFF, 0}, {0x2021, 1}, {0x2026, 1},
{0x6029, 0}, {0x605D, 0}, {0xA05E, 1}, {0x2060, 0}, {0x607C, 1},
{0xA07E, 0}, {0x20A8, 0}, {0xA0AF, 0}, {0x20B0, 0}, {0x20B2, 2},
{0x20B8, 1}, {0xA2C6, 1}, {0xA2C9, 0}, {0x22CA, 1}, {0xA2CD, 0},
{0x22D8, 2}, {0xA2DC, 0}, {0x22DD, 0}, {0xA2F7, 0}, {0xA302, 0},
{0x2311, 0}, {0x2320, 0}, {0x2325, 0}, {0x2327, 0}, {0x232A, 0},
{0x2332, 0}, {0x6416, 0}, {0x2419, 2}, {0x241D, 2}, {0x2432, 5},
{0xA43E, 0}, {0x2457, 0}, {0x24DB, 1}, {0x6709, 0}, {0x670B, 0},
{0xA722, 1}, {0x672A, 0}, {0xA7B4, 1}, {0x27CD, 0}, {0xA7DC, 5},
{0x6B73, 0}, {0x6BE7, 0}, {0x6BE9, 0}, {0x6BEB, 0}, {0x6BED, 0},
{0x6BEF, 0}, {0x6D80, 0}, {0x6D84, 0}, {0x6D86, 0}, {0x6D88, 0},
{0x6D8A, 0}, {0x6D8C, 0}, {0x6D8E, 0}, {0x6D90, 0}, {0x6D92, 0},
{0x6D94, 0}, {0x6D96, 0}, {0x6D98, 0}, {0x6DFD, 0}};
} // namespace
MathMLOperatorDictionaryCategory FindCategory(
const String& content,
MathMLOperatorDictionaryForm form) {
DCHECK(!content.Is8Bit());
// Handle special cases and calculate a BMP code point used for the key.
uint16_t key{0};
if (content.length() == 1) {
UChar32 character = content[0];
if (character < kCombiningMinusSignBelow ||
character > kGreekCapitalReversedDottedLunateSigmaSymbol) {
// Accept BMP characters that are not in the ranges where 2-ASCII-chars
// operators are mapped below.
key = character;
}
} else if (content.length() == 2) {
UChar32 character = content.CharacterStartingAt(0);
if (character == kArabicMathematicalOperatorMeemWithHahWithTatweel ||
character == kArabicMathematicalOperatorHahWithDal) {
// Special handling of non-BMP Arabic operators.
if (form == MathMLOperatorDictionaryForm::kPostfix)
return MathMLOperatorDictionaryCategory::kI;
return MathMLOperatorDictionaryCategory::kNone;
} else if (content[1] == kCombiningLongSolidusOverlay ||
content[1] == kCombiningLongVerticalLineOverlay) {
// If the second character is COMBINING LONG SOLIDUS OVERLAY or
// COMBINING LONG VERTICAL LINE OVERLAY, then use the property of the
// first character.
key = content[0];
} else {
// Perform a binary search for 2-ASCII-chars operators.
const char** last =
operators_2_ascii_chars + base::size(operators_2_ascii_chars);
const char** entry = std::lower_bound(
operators_2_ascii_chars, last, content,
[](const char* lhs, const String& rhs) -> bool {
return lhs[0] < rhs[0] || (lhs[0] == rhs[0] && lhs[1] < rhs[1]);
});
if (entry != last && content == *entry)
key = kCombiningMinusSignBelow + (entry - operators_2_ascii_chars);
}
}
if (!key)
return MathMLOperatorDictionaryCategory::kNone;
// Handle special categories that are not encoded in the compact dictionary.
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-categories-values
if (form == MathMLOperatorDictionaryForm::kPrefix &&
((kDoubleStruckItalicCapitalDCharacter <= key &&
key <= kDoubleStruckItalicSmallDCharacter) ||
key == kPartialDifferential ||
(kSquareRootCharacter <= key && key <= kFourthRootCharacter))) {
return MathMLOperatorDictionaryCategory::kK;
}
if (form == MathMLOperatorDictionaryForm::kInfix &&
(key == kComma || key == kColon || key == kSemiColon)) {
return MathMLOperatorDictionaryCategory::kM;
}
// Calculate the key for the compact dictionary.
if (kEnQuadCharacter <= key && key <= kHellschreiberPauseSymbol) {
// Map above range (U+2000–U+2BFF) to (U+0400-0x0FFF) to fit into
// 12 bits by decrementing with (U+2000 - U+0400) == 0x1C00.
key -= 0x1C00;
} else if (key > kGreekCapitalReversedDottedLunateSigmaSymbol) {
return MathMLOperatorDictionaryCategory::kNone;
}
// Bitmasks used to set form 2-bits (infix=00, prefix=01, postfix=10).
if (form == MathMLOperatorDictionaryForm::kPrefix)
key |= 0x1000;
else if (form == MathMLOperatorDictionaryForm::kPostfix)
key |= 0x2000;
DCHECK_LE(key, 0x2FFF);
// Perform a binary search on the compact dictionary.
const EntryRange* entry_range = std::upper_bound(
compact_dictionary, compact_dictionary + base::size(compact_dictionary),
key, [](uint16_t lhs, EntryRange rhs) -> bool {
return lhs < ExtractKey(rhs);
});
if (entry_range == compact_dictionary)
return MathMLOperatorDictionaryCategory::kNone;
entry_range--;
DCHECK_LE(ExtractKey(*entry_range), key);
if (key > (ExtractKey(*entry_range) + entry_range->range_bounds_delta))
return MathMLOperatorDictionaryCategory::kNone;
// An entry is found: set the properties according the category.
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary-categories-values
switch (ExtractCategory(*entry_range)) {
case 0x0:
return MathMLOperatorDictionaryCategory::kA;
case 0x4:
return MathMLOperatorDictionaryCategory::kB;
case 0x8:
return MathMLOperatorDictionaryCategory::kC;
case 0x1:
case 0x2:
case 0xC:
return MathMLOperatorDictionaryCategory::kDorEorL;
case 0x5:
case 0x6:
return MathMLOperatorDictionaryCategory::kForG;
case 0x9:
return MathMLOperatorDictionaryCategory::kH;
case 0xA:
return MathMLOperatorDictionaryCategory::kI;
case 0xD:
return MathMLOperatorDictionaryCategory::kJ;
}
NOTREACHED();
return MathMLOperatorDictionaryCategory::kNone;
}
} // namespace blink
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_MATHML_OPERATOR_DICTIONARY_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_MATHML_OPERATOR_DICTIONARY_H_
#include "third_party/blink/renderer/platform/platform_export.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
namespace blink {
enum class MathMLOperatorDictionaryCategory : uint8_t {
kNone,
kA,
kB,
kC,
kDorEorL,
kForG,
kH,
kI,
kJ,
kK,
kM,
kUndefined = 15
};
enum MathMLOperatorDictionaryForm { kInfix, kPrefix, kPostfix };
// FindCategory takes a UTF-16 string and form (infix, prefix, postfix) as input
// and returns the operator dictionary category for this pair, see:
// https://mathml-refresh.github.io/mathml-core/#operator-dictionary
PLATFORM_EXPORT MathMLOperatorDictionaryCategory
FindCategory(const String& content, MathMLOperatorDictionaryForm);
} // namespace blink
#endif // THIRD_PARTY_BLINK_RENDERER_CORE_MATHML_MATHML_OPERATOR_ELEMENT_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/platform/text/mathml_operator_dictionary.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
namespace blink {
static const UChar32 category_a[]{
0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198,
0x2199, 0x219C, 0x219D, 0x219E, 0x219F, 0x21A0, 0x21A1, 0x21A2, 0x21A3,
0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC,
0x21BC, 0x21BD, 0x21BE, 0x21BF, 0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4,
0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21D0,
0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9,
0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4,
0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED,
0x21EE, 0x21EF, 0x21F0, 0x21F3, 0x21F5, 0x21F6, 0x21FD, 0x21FE, 0x21FF,
0x27F0, 0x27F1, 0x27F5, 0x27F6, 0x27F7, 0x27F8, 0x27F9, 0x27FA, 0x27FB,
0x27FC, 0x27FD, 0x27FE, 0x27FF, 0x2952, 0x290A, 0x290B, 0x290C, 0x290D,
0x290E, 0x290F, 0x2910, 0x2912, 0x2913, 0x2921, 0x2922, 0x294E, 0x294F,
0x2950, 0x2951, 0x2952, 0x2953, 0x2954, 0x2955, 0x2956, 0x2957, 0x2958,
0x2959, 0x295A, 0x295B, 0x295C, 0x295D, 0x295E, 0x295F, 0x2960, 0x2961,
0x296E, 0x296F, 0x2B45, 0x2B46};
static const UChar32 category_b[]{
0x002B, 0x002D, 0x002F, 0x00B1, 0x00F7, 0x0322, 0x2044, 0x2212, 0x2213,
0x2214, 0x2215, 0x2216, 0x2218, 0x2224, 0x2227, 0x2228, 0x2229, 0x222A,
0x2236, 0x2238, 0x228C, 0x228D, 0x228E, 0x228F, 0x2293, 0x2294, 0x2295,
0x2296, 0x2298, 0x229D, 0x229E, 0x229F, 0x22BB, 0x22BC, 0x22BD, 0x22C4,
0x22C6, 0x22CE, 0x22CF, 0x22D2, 0x22D3, 0x2795, 0x2796, 0x2797, 0x27F4,
0x29BC, 0x29F6, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28,
0x2A29, 0x2A2A, 0x2A2B, 0x2A2C, 0x2A2D, 0x2A2E, 0x2A38, 0x2A39, 0x2A3A,
0x2A40, 0x2A41, 0x2A42, 0x2A43, 0x2A44, 0x2A45, 0x2A46, 0x2A47, 0x2A48,
0x2A49, 0x2A4A, 0x2A4B, 0x2A4C, 0x2A4D, 0x2A4E, 0x2A4F, 0x2A51, 0x2A52,
0x2A53, 0x2A54, 0x2A55, 0x2A56, 0x2A57, 0x2A58, 0x2A59, 0x2A5A, 0x2A5B,
0x2A5C, 0x2A5D, 0x2A5E, 0x2A5F, 0x2A60, 0x2A61, 0x2A62, 0x2A63, 0x2ADA,
0x2ADB, 0x2AFB, 0x2AFD, 0x2B32,
};
static const UChar32 category_c[]{
0x0025, 0x002A, 0x002E, 0x003F, 0x0040, 0x005E, 0x005F, 0x007C, 0x00B7,
0x00D7, 0x0323, 0x032B, 0x032F, 0x0332, 0x2022, 0x2043, 0x2206, 0x220E,
0x2217, 0x223F, 0x2240, 0x2297, 0x2299, 0x22A0, 0x22A1, 0x22C5, 0x22C7,
0x22C9, 0x22CA, 0x22CB, 0x22CC, 0x2305, 0x2306, 0x25A0, 0x25A1, 0x25AA,
0x25AB, 0x25AD, 0x25AE, 0x25AF, 0x25B0, 0x25B1, 0x27CB, 0x27CD, 0x2981,
0x2982, 0x2999, 0x299A, 0x29B5, 0x29C2, 0x29C3, 0x29C9, 0x29CA, 0x29CB,
0x29CC, 0x29CD, 0x29D8, 0x29D9, 0x29DB, 0x29DF, 0x29E0, 0x29E2, 0x29E7,
0x29E8, 0x29E9, 0x29EA, 0x29EB, 0x29EC, 0x29ED, 0x29F8, 0x29F9, 0x29FA,
0x29FB, 0x2A1D, 0x2A1E, 0x2A1F, 0x2A20, 0x2A21, 0x2A2F, 0x2A30, 0x2A31,
0x2A32, 0x2A33, 0x2A34, 0x2A35, 0x2A36, 0x2A37, 0x2A3B, 0x2A3C, 0x2A3D,
0x2A3F, 0x2A50, 0x2ADC, 0x2ADD, 0x2AFE,
};
static const UChar32 category_d[]{
0x0021, 0x002B, 0x002D, 0x00AC, 0x00B1, 0x0332, 0x2018, 0x201C, 0x2200,
0x2201, 0x2203, 0x2204, 0x2207, 0x2212, 0x2213, 0x221F, 0x2220, 0x2221,
0x2222, 0x223C, 0x22BE, 0x22BF, 0x2310, 0x2319, 0x2795, 0x2796, 0x27C0,
0x299B, 0x299C, 0x299D, 0x299E, 0x299F, 0x29A0, 0x29A1, 0x29A2, 0x29A3,
0x29A4, 0x29A5, 0x29A6, 0x29A7, 0x29A8, 0x29A9, 0x29AA, 0x29AB, 0x29AC,
0x29AD, 0x29AE, 0x29AF, 0x2AEC, 0x2AED,
};
static const UChar32 category_e[]{
0x0021, 0x0022, 0x0026, 0x0027, 0x0060, 0x00A8, 0x00B0, 0x00B2,
0x00B3, 0x00B4, 0x00B8, 0x00B9, 0x02CA, 0x02CB, 0x02D8, 0x02D9,
0x02DA, 0x02DD, 0x0311, 0x0320, 0x0325, 0x0327, 0x032A, 0x0332,
0x2019, 0x201A, 0x201B, 0x201D, 0x201E, 0x201F, 0x2032, 0x2033,
0x2034, 0x2035, 0x2036, 0x2037, 0x2057, 0x20DB, 0x20DC, 0x23CD,
};
static const UChar32 category_f[]{
0x0028, 0x005B, 0x007B, 0x007C, 0x2016, 0x2308, 0x230A, 0x2329, 0x2772,
0x27E6, 0x27E8, 0x27EA, 0x27EC, 0x27EE, 0x2980, 0x2983, 0x2985, 0x2987,
0x2989, 0x298B, 0x298D, 0x298F, 0x2991, 0x2993, 0x2995, 0x2997, 0x29FC,
};
static const UChar32 category_g[]{
0x0029, 0x005D, 0x007C, 0x007D, 0x2016, 0x2309, 0x230B, 0x232A, 0x2773,
0x27E7, 0x27E9, 0x27EB, 0x27ED, 0x27EF, 0x2980, 0x2984, 0x2986, 0x2988,
0x298A, 0x298C, 0x298E, 0x2990, 0x2992, 0x2994, 0x2996, 0x2998, 0x29FD,
};
static const UChar32 category_h[]{
0x222B, 0x222C, 0x222D, 0x222E, 0x222F, 0x2230, 0x2231, 0x2232, 0x2233,
0x2A0B, 0x2A0C, 0x2A0D, 0x2A0E, 0x2A0F, 0x2A10, 0x2A11, 0x2A12, 0x2A13,
0x2A14, 0x2A15, 0x2A16, 0x2A17, 0x2A18, 0x2A19, 0x2A1A, 0x2A1B, 0x2A1C};
static const UChar32 category_i[]{
0x005E, 0x005F, 0x007E, 0x00AF, 0x02C6, 0x02C7, 0x02C9, 0x02CD,
0x02DC, 0x02F7, 0x0302, 0x203E, 0x2322, 0x2323, 0x23B4, 0x23B5,
0x23DC, 0x23DD, 0x23DE, 0x23DF, 0x23E0, 0x23E1,
};
static const UChar32 category_j[]{
0x220F, 0x2210, 0x2211, 0x22C0, 0x22C1, 0x22C2, 0x22C3,
0x2A00, 0x2A01, 0x2A02, 0x2A03, 0x2A04, 0x2A05, 0x2A06,
0x2A07, 0x2A08, 0x2A09, 0x2A0A, 0x2AFC, 0x2AFF,
};
static const UChar32 category_k[]{
0x2145, 0x2146, 0x2202, 0x221A, 0x221B, 0x221C,
};
static const UChar32 category_l[]{
0x005C, 0x2061, 0x2062, 0x2063, 0x2064,
};
static const UChar32 category_m[]{
0x002C,
0x003A,
0x003B,
};
template <typename T, size_t N>
bool IsInCategory(const T (&table)[N], UChar32 character) {
return std::binary_search(table, table + base::size(table), character);
}
String FromUChar32(UChar32 c) {
StringBuilder input;
input.Append(c);
return input.ToString();
}
TEST(MathOperatorDictionaryTest, Infix) {
for (UChar32 ch = 0; ch < kMaxCodepoint; ch++) {
String s = FromUChar32(ch);
s.Ensure16Bit();
if (ch >= kCombiningMinusSignBelow &&
ch <= kGreekCapitalReversedDottedLunateSigmaSymbol) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kInfix) ==
MathMLOperatorDictionaryCategory::kNone);
} else if (IsInCategory(category_a, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kInfix) ==
MathMLOperatorDictionaryCategory::kA);
} else if (IsInCategory(category_b, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kInfix) ==
MathMLOperatorDictionaryCategory::kB);
} else if (IsInCategory(category_c, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kInfix) ==
MathMLOperatorDictionaryCategory::kC);
} else if (IsInCategory(category_l, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kInfix) ==
MathMLOperatorDictionaryCategory::kDorEorL);
} else if (IsInCategory(category_m, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kInfix) ==
MathMLOperatorDictionaryCategory::kM);
}
}
}
TEST(MathOperatorDictionaryTest, Prefix) {
for (UChar32 ch = 0; ch < kMaxCodepoint; ch++) {
String s = FromUChar32(ch);
s.Ensure16Bit();
if (ch >= kCombiningMinusSignBelow &&
ch <= kGreekCapitalReversedDottedLunateSigmaSymbol) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kNone);
} else if (IsInCategory(category_d, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kDorEorL);
} else if (IsInCategory(category_f, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kForG);
} else if (IsInCategory(category_h, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kH);
} else if (IsInCategory(category_j, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kJ);
} else if (IsInCategory(category_k, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kK);
} else {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPrefix) ==
MathMLOperatorDictionaryCategory::kNone);
}
}
}
TEST(MathOperatorDictionaryTest, Postfix) {
for (UChar32 ch = 0; ch < kMaxCodepoint; ch++) {
String s = FromUChar32(ch);
s.Ensure16Bit();
if (ch >= kCombiningMinusSignBelow &&
ch <= kGreekCapitalReversedDottedLunateSigmaSymbol) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPostfix) ==
MathMLOperatorDictionaryCategory::kNone);
} else if (ch == kArabicMathematicalOperatorMeemWithHahWithTatweel ||
ch == kArabicMathematicalOperatorHahWithDal) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPostfix) ==
MathMLOperatorDictionaryCategory::kI);
} else if (IsInCategory(category_e, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPostfix) ==
MathMLOperatorDictionaryCategory::kDorEorL);
} else if (IsInCategory(category_g, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPostfix) ==
MathMLOperatorDictionaryCategory::kForG);
} else if (IsInCategory(category_i, ch)) {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPostfix) ==
MathMLOperatorDictionaryCategory::kI);
} else {
EXPECT_TRUE(FindCategory(s, MathMLOperatorDictionaryForm::kPostfix) ==
MathMLOperatorDictionaryCategory::kNone);
}
}
}
} // namespace blink
...@@ -50,9 +50,17 @@ const UChar kBulletCharacter = 0x2022; ...@@ -50,9 +50,17 @@ const UChar kBulletCharacter = 0x2022;
const UChar kBullseyeCharacter = 0x25CE; const UChar kBullseyeCharacter = 0x25CE;
const UChar32 kCancelTag = 0xE007F; const UChar32 kCancelTag = 0xE007F;
const UChar kCarriageReturnCharacter = 0x000D; const UChar kCarriageReturnCharacter = 0x000D;
const UChar kColon = 0x3A;
const UChar kCombiningEnclosingCircleBackslashCharacter = 0x20E0; const UChar kCombiningEnclosingCircleBackslashCharacter = 0x20E0;
const UChar kCombiningEnclosingKeycapCharacter = 0x20E3; const UChar kCombiningEnclosingKeycapCharacter = 0x20E3;
const UChar kCombiningLongSolidusOverlay = 0x0338;
const UChar kCombiningLongVerticalLineOverlay = 0x20D2;
const UChar kCombiningMinusSignBelow = 0x0320;
const UChar kComma = 0x2C;
const UChar kDeleteCharacter = 0x007F; const UChar kDeleteCharacter = 0x007F;
const UChar kDoubleStruckItalicCapitalDCharacter = 0x2145;
const UChar kDoubleStruckItalicSmallDCharacter = 0x2146;
const UChar kEnQuadCharacter = 0x2000;
const UChar kEthiopicPrefaceColonCharacter = 0x1366; const UChar kEthiopicPrefaceColonCharacter = 0x1366;
const UChar kEthiopicWordspaceCharacter = 0x1361; const UChar kEthiopicWordspaceCharacter = 0x1361;
const UChar kHeavyBlackHeartCharacter = 0x2764; const UChar kHeavyBlackHeartCharacter = 0x2764;
...@@ -66,7 +74,9 @@ const UChar32 kFamilyCharacter = 0x1F46A; ...@@ -66,7 +74,9 @@ const UChar32 kFamilyCharacter = 0x1F46A;
const UChar kFemaleSignCharacter = 0x2640; const UChar kFemaleSignCharacter = 0x2640;
const UChar kFirstStrongIsolateCharacter = 0x2068; const UChar kFirstStrongIsolateCharacter = 0x2068;
const UChar kFisheyeCharacter = 0x25C9; const UChar kFisheyeCharacter = 0x25C9;
const UChar kFourthRootCharacter = 0x221C;
const UChar kFullstopCharacter = 0x002E; const UChar kFullstopCharacter = 0x002E;
const UChar kGreekCapitalReversedDottedLunateSigmaSymbol = 0x03FF;
const UChar kGreekKappaSymbol = 0x03F0; const UChar kGreekKappaSymbol = 0x03F0;
const UChar kGreekLetterDigamma = 0x03DC; const UChar kGreekLetterDigamma = 0x03DC;
const UChar kGreekLowerAlpha = 0x03B1; const UChar kGreekLowerAlpha = 0x03B1;
...@@ -82,6 +92,7 @@ const UChar kGreekUpperOmega = 0x03A9; ...@@ -82,6 +92,7 @@ const UChar kGreekUpperOmega = 0x03A9;
const UChar kGreekUpperTheta = 0x03F4; const UChar kGreekUpperTheta = 0x03F4;
const UChar kHebrewPunctuationGereshCharacter = 0x05F3; const UChar kHebrewPunctuationGereshCharacter = 0x05F3;
const UChar kHebrewPunctuationGershayimCharacter = 0x05F4; const UChar kHebrewPunctuationGershayimCharacter = 0x05F4;
const UChar kHellschreiberPauseSymbol = 0x2BFF;
const UChar kHiraganaLetterSmallACharacter = 0x3041; const UChar kHiraganaLetterSmallACharacter = 0x3041;
const UChar kHoleGreekUpperTheta = 0x03A2; const UChar kHoleGreekUpperTheta = 0x03A2;
const UChar kHorizontalEllipsisCharacter = 0x2026; const UChar kHorizontalEllipsisCharacter = 0x2026;
...@@ -147,6 +158,7 @@ const UChar kRightToLeftEmbedCharacter = 0x202B; ...@@ -147,6 +158,7 @@ const UChar kRightToLeftEmbedCharacter = 0x202B;
const UChar kRightToLeftIsolateCharacter = 0x2067; const UChar kRightToLeftIsolateCharacter = 0x2067;
const UChar kRightToLeftMarkCharacter = 0x200F; const UChar kRightToLeftMarkCharacter = 0x200F;
const UChar kRightToLeftOverrideCharacter = 0x202E; const UChar kRightToLeftOverrideCharacter = 0x202E;
const UChar kSemiColon = 0x3B;
const UChar kSesameDotCharacter = 0xFE45; const UChar kSesameDotCharacter = 0xFE45;
const UChar kSmallLetterSharpSCharacter = 0x00DF; const UChar kSmallLetterSharpSCharacter = 0x00DF;
const UChar kSolidusCharacter = 0x002F; const UChar kSolidusCharacter = 0x002F;
...@@ -195,8 +207,16 @@ using WTF::unicode::kBulletCharacter; ...@@ -195,8 +207,16 @@ using WTF::unicode::kBulletCharacter;
using WTF::unicode::kBullseyeCharacter; using WTF::unicode::kBullseyeCharacter;
using WTF::unicode::kCancelTag; using WTF::unicode::kCancelTag;
using WTF::unicode::kCarriageReturnCharacter; using WTF::unicode::kCarriageReturnCharacter;
using WTF::unicode::kColon;
using WTF::unicode::kCombiningEnclosingCircleBackslashCharacter; using WTF::unicode::kCombiningEnclosingCircleBackslashCharacter;
using WTF::unicode::kCombiningEnclosingKeycapCharacter; using WTF::unicode::kCombiningEnclosingKeycapCharacter;
using WTF::unicode::kCombiningLongSolidusOverlay;
using WTF::unicode::kCombiningLongVerticalLineOverlay;
using WTF::unicode::kCombiningMinusSignBelow;
using WTF::unicode::kComma;
using WTF::unicode::kDoubleStruckItalicCapitalDCharacter;
using WTF::unicode::kDoubleStruckItalicSmallDCharacter;
using WTF::unicode::kEnQuadCharacter;
using WTF::unicode::kEthiopicPrefaceColonCharacter; using WTF::unicode::kEthiopicPrefaceColonCharacter;
using WTF::unicode::kEthiopicWordspaceCharacter; using WTF::unicode::kEthiopicWordspaceCharacter;
using WTF::unicode::kEyeCharacter; using WTF::unicode::kEyeCharacter;
...@@ -205,7 +225,9 @@ using WTF::unicode::kFemaleSignCharacter; ...@@ -205,7 +225,9 @@ using WTF::unicode::kFemaleSignCharacter;
using WTF::unicode::kFirstStrongIsolateCharacter; using WTF::unicode::kFirstStrongIsolateCharacter;
using WTF::unicode::kFisheyeCharacter; using WTF::unicode::kFisheyeCharacter;
using WTF::unicode::kFormFeedCharacter; using WTF::unicode::kFormFeedCharacter;
using WTF::unicode::kFourthRootCharacter;
using WTF::unicode::kFullstopCharacter; using WTF::unicode::kFullstopCharacter;
using WTF::unicode::kGreekCapitalReversedDottedLunateSigmaSymbol;
using WTF::unicode::kGreekKappaSymbol; using WTF::unicode::kGreekKappaSymbol;
using WTF::unicode::kGreekLetterDigamma; using WTF::unicode::kGreekLetterDigamma;
using WTF::unicode::kGreekLowerAlpha; using WTF::unicode::kGreekLowerAlpha;
...@@ -221,6 +243,7 @@ using WTF::unicode::kGreekUpperOmega; ...@@ -221,6 +243,7 @@ using WTF::unicode::kGreekUpperOmega;
using WTF::unicode::kGreekUpperTheta; using WTF::unicode::kGreekUpperTheta;
using WTF::unicode::kHebrewPunctuationGereshCharacter; using WTF::unicode::kHebrewPunctuationGereshCharacter;
using WTF::unicode::kHebrewPunctuationGershayimCharacter; using WTF::unicode::kHebrewPunctuationGershayimCharacter;
using WTF::unicode::kHellschreiberPauseSymbol;
using WTF::unicode::kHiraganaLetterSmallACharacter; using WTF::unicode::kHiraganaLetterSmallACharacter;
using WTF::unicode::kHoleGreekUpperTheta; using WTF::unicode::kHoleGreekUpperTheta;
using WTF::unicode::kHorizontalEllipsisCharacter; using WTF::unicode::kHorizontalEllipsisCharacter;
...@@ -287,6 +310,7 @@ using WTF::unicode::kRightToLeftEmbedCharacter; ...@@ -287,6 +310,7 @@ using WTF::unicode::kRightToLeftEmbedCharacter;
using WTF::unicode::kRightToLeftIsolateCharacter; using WTF::unicode::kRightToLeftIsolateCharacter;
using WTF::unicode::kRightToLeftMarkCharacter; using WTF::unicode::kRightToLeftMarkCharacter;
using WTF::unicode::kRightToLeftOverrideCharacter; using WTF::unicode::kRightToLeftOverrideCharacter;
using WTF::unicode::kSemiColon;
using WTF::unicode::kSesameDotCharacter; using WTF::unicode::kSesameDotCharacter;
using WTF::unicode::kSmallLetterSharpSCharacter; using WTF::unicode::kSmallLetterSharpSCharacter;
using WTF::unicode::kSoftHyphenCharacter; using WTF::unicode::kSoftHyphenCharacter;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment