Commit 3b94fb21 authored by Koji Ishii's avatar Koji Ishii Committed by Commit Bot

Unify fast code path to |CaseMap::ToLower, ToUpper|

With this patch, |CaseMap::ToLower, ToUpper| can use existing
fast code path algorithms whenever possible, with or without
|TextOffsetMap|. Redundant code is unified, all full Unicode
case-mapping goes to |icu::CaseMap| API.

The dromaeo benchmark looks neutral, noisy from positive 5.4%
to negative 5.2%.
https://pinpoint-dot-chromeperf.appspot.com/job/123853bf5c0000
https://pinpoint-dot-chromeperf.appspot.com/job/15cfedf0dc0000

The new |TextOffsetMap| is not used in the actual code path
yet. Following patch will start using it to fix issue 926003.

Bug: 926003, 985201
Change-Id: I7ff0c2335e8dc61b64b629364978ceba6c8fac96
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1722830
Commit-Queue: Koji Ishii <kojii@chromium.org>
Reviewed-by: default avatarDominik Röttsches <drott@chromium.org>
Cr-Commit-Position: refs/heads/master@{#682180}
parent 40adbd62
...@@ -30,7 +30,7 @@ inline bool LocaleIdMatchesLang(const AtomicString& locale_id, ...@@ -30,7 +30,7 @@ inline bool LocaleIdMatchesLang(const AtomicString& locale_id,
maybe_delimiter == '@'; maybe_delimiter == '@';
} }
enum class CaseMapType { kLower, kUpper, kLowerLegacy, kUpperLegacy }; enum class CaseMapType { kLower, kUpper };
scoped_refptr<StringImpl> CaseConvert(CaseMapType type, scoped_refptr<StringImpl> CaseConvert(CaseMapType type,
StringImpl* source, StringImpl* source,
...@@ -51,42 +51,26 @@ scoped_refptr<StringImpl> CaseConvert(CaseMapType type, ...@@ -51,42 +51,26 @@ scoped_refptr<StringImpl> CaseConvert(CaseMapType type,
while (true) { while (true) {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
icu::Edits edits; icu::Edits edits;
bool is_edits_valid = false;
switch (type) { switch (type) {
case CaseMapType::kLower: case CaseMapType::kLower:
target_length = icu::CaseMap::toLower( target_length = icu::CaseMap::toLower(
locale, /* options */ 0, locale, /* options */ 0,
reinterpret_cast<const char16_t*>(source16), source_length, reinterpret_cast<const char16_t*>(source16), source_length,
reinterpret_cast<char16_t*>(data16), target_length, &edits, status); reinterpret_cast<char16_t*>(data16), target_length, &edits, status);
is_edits_valid = true;
break; break;
case CaseMapType::kUpper: case CaseMapType::kUpper:
target_length = icu::CaseMap::toUpper( target_length = icu::CaseMap::toUpper(
locale, /* options */ 0, locale, /* options */ 0,
reinterpret_cast<const char16_t*>(source16), source_length, reinterpret_cast<const char16_t*>(source16), source_length,
reinterpret_cast<char16_t*>(data16), target_length, &edits, status); reinterpret_cast<char16_t*>(data16), target_length, &edits, status);
is_edits_valid = true;
break;
case CaseMapType::kLowerLegacy:
DCHECK(!offset_map);
target_length = u_strToLower(data16, target_length, source16,
source_length, locale, &status);
break;
case CaseMapType::kUpperLegacy:
DCHECK(!offset_map);
target_length = u_strToUpper(data16, target_length, source16,
source_length, locale, &status);
break; break;
} }
if (U_SUCCESS(status)) { if (U_SUCCESS(status)) {
if (is_edits_valid) { if (!edits.hasChanges())
if (!edits.hasChanges()) return source;
return source;
if (offset_map) if (offset_map)
offset_map->Append(edits); offset_map->Append(edits);
}
if (source_length == target_length) if (source_length == target_length)
return output; return output;
...@@ -132,7 +116,7 @@ CaseMap::Locale::Locale(const AtomicString& locale) { ...@@ -132,7 +116,7 @@ CaseMap::Locale::Locale(const AtomicString& locale) {
case_map_locale_ = nullptr; case_map_locale_ = nullptr;
} }
scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) { scoped_refptr<StringImpl> CaseMap::TryFastToLowerInvariant(StringImpl* source) {
DCHECK(source); DCHECK(source);
// Note: This is a hot function in the Dromaeo benchmark, specifically the // Note: This is a hot function in the Dromaeo benchmark, specifically the
...@@ -196,27 +180,33 @@ scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) { ...@@ -196,27 +180,33 @@ scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) {
return new_impl; return new_impl;
} }
// Do a slower implementation for cases that include non-ASCII characters. // The fast code path was not able to handle this case.
UChar* data16; return nullptr;
scoped_refptr<StringImpl> new_impl = }
StringImpl::CreateUninitialized(source->length(), data16);
bool error; scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) {
int32_t real_length = unicode::ToLower(data16, length, source->Characters16(), // Note: This is a hot function in the Dromaeo benchmark.
source->length(), &error); DCHECK(source);
if (!error && real_length == length) if (scoped_refptr<StringImpl> result = TryFastToLowerInvariant(source))
return new_impl; return result;
const char* locale = ""; // "" = root locale.
return CaseConvert(CaseMapType::kLower, source, locale);
}
new_impl = StringImpl::CreateUninitialized(real_length, data16); scoped_refptr<StringImpl> CaseMap::ToLowerInvariant(StringImpl* source,
unicode::ToLower(data16, real_length, source->Characters16(), TextOffsetMap* offset_map) {
source->length(), &error); DCHECK(source);
if (error) DCHECK(!offset_map || offset_map->IsEmpty());
return source; if (scoped_refptr<StringImpl> result = TryFastToLowerInvariant(source))
return new_impl; return result;
const char* locale = ""; // "" = root locale.
return CaseConvert(CaseMapType::kLower, source, locale, offset_map);
} }
scoped_refptr<StringImpl> CaseMap::FastToUpperInvariant(StringImpl* source) { scoped_refptr<StringImpl> CaseMap::ToUpperInvariant(StringImpl* source,
TextOffsetMap* offset_map) {
DCHECK(source); DCHECK(source);
DCHECK(!offset_map || offset_map->IsEmpty());
// This function could be optimized for no-op cases the way LowerUnicode() is, // This function could be optimized for no-op cases the way LowerUnicode() is,
// but in empirical testing, few actual calls to UpperUnicode() are no-ops, so // but in empirical testing, few actual calls to UpperUnicode() are no-ops, so
...@@ -277,6 +267,8 @@ scoped_refptr<StringImpl> CaseMap::FastToUpperInvariant(StringImpl* source) { ...@@ -277,6 +267,8 @@ scoped_refptr<StringImpl> CaseMap::FastToUpperInvariant(StringImpl* source) {
if (c == kSmallLetterSharpSCharacter) { if (c == kSmallLetterSharpSCharacter) {
*dest++ = 'S'; *dest++ = 'S';
*dest++ = 'S'; *dest++ = 'S';
if (offset_map)
offset_map->Append(i + 1, dest - data8);
} else { } else {
*dest++ = static_cast<LChar>(unicode::ToUpper(c)); *dest++ = static_cast<LChar>(unicode::ToUpper(c));
} }
...@@ -304,66 +296,44 @@ upconvert: ...@@ -304,66 +296,44 @@ upconvert:
return new_impl; return new_impl;
// Do a slower implementation for cases that include non-ASCII characters. // Do a slower implementation for cases that include non-ASCII characters.
bool error; const char* locale = ""; // "" = root locale.
int32_t real_length = return CaseConvert(CaseMapType::kUpper, source, locale, offset_map);
unicode::ToUpper(data16, length, source16, source->length(), &error);
if (!error && real_length == length)
return new_impl;
new_impl = StringImpl::CreateUninitialized(real_length, data16);
unicode::ToUpper(data16, real_length, source16, source->length(), &error);
if (error)
return source;
return new_impl;
} }
scoped_refptr<StringImpl> CaseMap::ToLower(StringImpl* source) const { scoped_refptr<StringImpl> CaseMap::ToLower(StringImpl* source,
if (!case_map_locale_) TextOffsetMap* offset_map) const {
return FastToLowerInvariant(source); DCHECK(source);
DCHECK(!offset_map || offset_map->IsEmpty());
return CaseConvert(CaseMapType::kLowerLegacy, source, case_map_locale_);
}
scoped_refptr<StringImpl> CaseMap::ToUpper(StringImpl* source) const {
if (!case_map_locale_) if (!case_map_locale_)
return FastToUpperInvariant(source); return ToLowerInvariant(source, offset_map);
return CaseConvert(CaseMapType::kLower, source, case_map_locale_, offset_map);
return CaseConvert(CaseMapType::kUpperLegacy, source, case_map_locale_);
} }
String CaseMap::ToLower(const String& source) const { scoped_refptr<StringImpl> CaseMap::ToUpper(StringImpl* source,
if (StringImpl* impl = source.Impl()) TextOffsetMap* offset_map) const {
return ToLower(impl); DCHECK(source);
return String(); DCHECK(!offset_map || offset_map->IsEmpty());
}
String CaseMap::ToUpper(const String& source) const { if (!case_map_locale_)
if (StringImpl* impl = source.Impl()) return ToUpperInvariant(source, offset_map);
return ToUpper(impl); return CaseConvert(CaseMapType::kUpper, source, case_map_locale_, offset_map);
return String();
} }
String CaseMap::ToLower(const String& source, TextOffsetMap* offset_map) const { String CaseMap::ToLower(const String& source, TextOffsetMap* offset_map) const {
DCHECK(offset_map && offset_map->IsEmpty()); DCHECK(!offset_map || offset_map->IsEmpty());
if (UNLIKELY(source.IsEmpty())) if (StringImpl* impl = source.Impl())
return source; return ToLower(impl, offset_map);
return String();
// TODO(kojii): Implement fast-path for simple cases.
return CaseConvert(CaseMapType::kLower, source.Impl(), case_map_locale_,
offset_map);
} }
String CaseMap::ToUpper(const String& source, TextOffsetMap* offset_map) const { String CaseMap::ToUpper(const String& source, TextOffsetMap* offset_map) const {
DCHECK(offset_map && offset_map->IsEmpty()); DCHECK(!offset_map || offset_map->IsEmpty());
if (UNLIKELY(source.IsEmpty())) if (StringImpl* impl = source.Impl())
return source; return ToUpper(impl, offset_map);
return String();
// TODO(kojii): Implement fast-path for simple cases.
return CaseConvert(CaseMapType::kUpper, source.Impl(), case_map_locale_,
offset_map);
} }
} // namespace WTF } // namespace WTF
...@@ -43,14 +43,10 @@ class WTF_EXPORT CaseMap { ...@@ -43,14 +43,10 @@ class WTF_EXPORT CaseMap {
// Construct from a locale string. The given locale is normalized. // Construct from a locale string. The given locale is normalized.
explicit CaseMap(const AtomicString& locale) : CaseMap(Locale(locale)) {} explicit CaseMap(const AtomicString& locale) : CaseMap(Locale(locale)) {}
String ToLower(const String& source) const; String ToLower(const String& source,
String ToUpper(const String& source) const; TextOffsetMap* offset_map = nullptr) const;
String ToUpper(const String& source,
scoped_refptr<StringImpl> ToLower(StringImpl* source) const; TextOffsetMap* offset_map = nullptr) const;
scoped_refptr<StringImpl> ToUpper(StringImpl* source) const;
String ToLower(const String& source, TextOffsetMap* offset_map) const;
String ToUpper(const String& source, TextOffsetMap* offset_map) const;
// Fast code path for simple cases, only for root locale. // Fast code path for simple cases, only for root locale.
// TODO(crbug.com/627682): This should move to private, once // TODO(crbug.com/627682): This should move to private, once
...@@ -58,8 +54,21 @@ class WTF_EXPORT CaseMap { ...@@ -58,8 +54,21 @@ class WTF_EXPORT CaseMap {
static scoped_refptr<StringImpl> FastToLowerInvariant(StringImpl* source); static scoped_refptr<StringImpl> FastToLowerInvariant(StringImpl* source);
private: private:
// Fast code path for simple cases, only for root locale. scoped_refptr<StringImpl> ToLower(StringImpl* source,
static scoped_refptr<StringImpl> FastToUpperInvariant(StringImpl* source); TextOffsetMap* offset_map = nullptr) const;
scoped_refptr<StringImpl> ToUpper(StringImpl* source,
TextOffsetMap* offset_map = nullptr) const;
// Fast code path for simple cases for root locale. When the fast code path is
// not possible, fallback to full Unicode casing using the root locale.
static scoped_refptr<StringImpl> ToLowerInvariant(StringImpl* source,
TextOffsetMap* offset_map);
static scoped_refptr<StringImpl> ToUpperInvariant(StringImpl* source,
TextOffsetMap* offset_map);
// Try the fast code path. Returns |nullptr| when the string cannot use the
// fast code path. The caller is responsible to fallback to full algorithm.
static scoped_refptr<StringImpl> TryFastToLowerInvariant(StringImpl* source);
const char* case_map_locale_; const char* case_map_locale_;
}; };
......
...@@ -13,6 +13,19 @@ using testing::ElementsAreArray; ...@@ -13,6 +13,19 @@ using testing::ElementsAreArray;
namespace WTF { namespace WTF {
namespace {
String To8BitOrNull(const String& source) {
if (source.IsNull() || source.Is8Bit())
return source;
if (!source.ContainsOnlyLatin1OrEmpty())
return String();
return String::Make8BitFrom16BitSource(source.Characters16(),
source.length());
}
} // namespace
static struct CaseMapTestData { static struct CaseMapTestData {
const char16_t* source; const char16_t* source;
const char* locale; const char* locale;
...@@ -105,6 +118,32 @@ TEST_P(CaseMapTest, ToUpper) { ...@@ -105,6 +118,32 @@ TEST_P(CaseMapTest, ToUpper) {
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map)); EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map));
} }
TEST_P(CaseMapTest, ToLower8Bit) {
const auto data = GetParam();
String source(data.source);
source = To8BitOrNull(source);
if (!source)
return;
CaseMap case_map(data.locale);
TextOffsetMap offset_map;
String lower = case_map.ToLower(source, &offset_map);
EXPECT_EQ(lower, String(data.lower_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.lower_map));
}
TEST_P(CaseMapTest, ToUpper8Bit) {
const auto data = GetParam();
String source(data.source);
source = To8BitOrNull(source);
if (!source)
return;
CaseMap case_map(data.locale);
TextOffsetMap offset_map;
String upper = case_map.ToUpper(source, &offset_map);
EXPECT_EQ(upper, String(data.upper_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map));
}
struct CaseFoldingTestData { struct CaseFoldingTestData {
const char* source_description; const char* source_description;
const char* source; const char* source;
......
...@@ -13,6 +13,12 @@ std::ostream& operator<<(std::ostream& stream, ...@@ -13,6 +13,12 @@ std::ostream& operator<<(std::ostream& stream,
return stream << "{" << entry.source << ", " << entry.target << "}"; return stream << "{" << entry.source << ", " << entry.target << "}";
} }
void TextOffsetMap::Append(wtf_size_t source, wtf_size_t target) {
DCHECK(IsEmpty() ||
(source > entries_.back().source && target > entries_.back().target));
entries_.emplace_back(source, target);
}
void TextOffsetMap::Append(const icu::Edits& edits) { void TextOffsetMap::Append(const icu::Edits& edits) {
DCHECK(IsEmpty()); DCHECK(IsEmpty());
......
...@@ -42,6 +42,7 @@ class WTF_EXPORT TextOffsetMap { ...@@ -42,6 +42,7 @@ class WTF_EXPORT TextOffsetMap {
void Clear() { entries_.Shrink(0); } void Clear() { entries_.Shrink(0); }
void Append(wtf_size_t source, wtf_size_t target);
void Append(const icu::Edits& edits); void Append(const icu::Edits& edits);
private: private:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment