Commit 3b94fb21 authored by Koji Ishii's avatar Koji Ishii Committed by Commit Bot

Unify fast code path to |CaseMap::ToLower, ToUpper|

With this patch, |CaseMap::ToLower, ToUpper| can use existing
fast code path algorithms whenever possible, with or without
|TextOffsetMap|. Redundant code is unified, all full Unicode
case-mapping goes to |icu::CaseMap| API.

The dromaeo benchmark looks neutral, noisy from positive 5.4%
to negative 5.2%.
https://pinpoint-dot-chromeperf.appspot.com/job/123853bf5c0000
https://pinpoint-dot-chromeperf.appspot.com/job/15cfedf0dc0000

The new |TextOffsetMap| is not used in the actual code path
yet. Following patch will start using it to fix issue 926003.

Bug: 926003, 985201
Change-Id: I7ff0c2335e8dc61b64b629364978ceba6c8fac96
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1722830
Commit-Queue: Koji Ishii <kojii@chromium.org>
Reviewed-by: default avatarDominik Röttsches <drott@chromium.org>
Cr-Commit-Position: refs/heads/master@{#682180}
parent 40adbd62
......@@ -30,7 +30,7 @@ inline bool LocaleIdMatchesLang(const AtomicString& locale_id,
maybe_delimiter == '@';
}
enum class CaseMapType { kLower, kUpper, kLowerLegacy, kUpperLegacy };
enum class CaseMapType { kLower, kUpper };
scoped_refptr<StringImpl> CaseConvert(CaseMapType type,
StringImpl* source,
......@@ -51,42 +51,26 @@ scoped_refptr<StringImpl> CaseConvert(CaseMapType type,
while (true) {
UErrorCode status = U_ZERO_ERROR;
icu::Edits edits;
bool is_edits_valid = false;
switch (type) {
case CaseMapType::kLower:
target_length = icu::CaseMap::toLower(
locale, /* options */ 0,
reinterpret_cast<const char16_t*>(source16), source_length,
reinterpret_cast<char16_t*>(data16), target_length, &edits, status);
is_edits_valid = true;
break;
case CaseMapType::kUpper:
target_length = icu::CaseMap::toUpper(
locale, /* options */ 0,
reinterpret_cast<const char16_t*>(source16), source_length,
reinterpret_cast<char16_t*>(data16), target_length, &edits, status);
is_edits_valid = true;
break;
case CaseMapType::kLowerLegacy:
DCHECK(!offset_map);
target_length = u_strToLower(data16, target_length, source16,
source_length, locale, &status);
break;
case CaseMapType::kUpperLegacy:
DCHECK(!offset_map);
target_length = u_strToUpper(data16, target_length, source16,
source_length, locale, &status);
break;
}
if (U_SUCCESS(status)) {
if (is_edits_valid) {
if (!edits.hasChanges())
return source;
if (offset_map)
offset_map->Append(edits);
}
if (source_length == target_length)
return output;
......@@ -132,7 +116,7 @@ CaseMap::Locale::Locale(const AtomicString& locale) {
case_map_locale_ = nullptr;
}
scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) {
scoped_refptr<StringImpl> CaseMap::TryFastToLowerInvariant(StringImpl* source) {
DCHECK(source);
// Note: This is a hot function in the Dromaeo benchmark, specifically the
......@@ -196,27 +180,33 @@ scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) {
return new_impl;
}
// Do a slower implementation for cases that include non-ASCII characters.
UChar* data16;
scoped_refptr<StringImpl> new_impl =
StringImpl::CreateUninitialized(source->length(), data16);
// The fast code path was not able to handle this case.
return nullptr;
}
bool error;
int32_t real_length = unicode::ToLower(data16, length, source->Characters16(),
source->length(), &error);
if (!error && real_length == length)
return new_impl;
scoped_refptr<StringImpl> CaseMap::FastToLowerInvariant(StringImpl* source) {
// Note: This is a hot function in the Dromaeo benchmark.
DCHECK(source);
if (scoped_refptr<StringImpl> result = TryFastToLowerInvariant(source))
return result;
const char* locale = ""; // "" = root locale.
return CaseConvert(CaseMapType::kLower, source, locale);
}
new_impl = StringImpl::CreateUninitialized(real_length, data16);
unicode::ToLower(data16, real_length, source->Characters16(),
source->length(), &error);
if (error)
return source;
return new_impl;
scoped_refptr<StringImpl> CaseMap::ToLowerInvariant(StringImpl* source,
TextOffsetMap* offset_map) {
DCHECK(source);
DCHECK(!offset_map || offset_map->IsEmpty());
if (scoped_refptr<StringImpl> result = TryFastToLowerInvariant(source))
return result;
const char* locale = ""; // "" = root locale.
return CaseConvert(CaseMapType::kLower, source, locale, offset_map);
}
scoped_refptr<StringImpl> CaseMap::FastToUpperInvariant(StringImpl* source) {
scoped_refptr<StringImpl> CaseMap::ToUpperInvariant(StringImpl* source,
TextOffsetMap* offset_map) {
DCHECK(source);
DCHECK(!offset_map || offset_map->IsEmpty());
// This function could be optimized for no-op cases the way LowerUnicode() is,
// but in empirical testing, few actual calls to UpperUnicode() are no-ops, so
......@@ -277,6 +267,8 @@ scoped_refptr<StringImpl> CaseMap::FastToUpperInvariant(StringImpl* source) {
if (c == kSmallLetterSharpSCharacter) {
*dest++ = 'S';
*dest++ = 'S';
if (offset_map)
offset_map->Append(i + 1, dest - data8);
} else {
*dest++ = static_cast<LChar>(unicode::ToUpper(c));
}
......@@ -304,66 +296,44 @@ upconvert:
return new_impl;
// Do a slower implementation for cases that include non-ASCII characters.
bool error;
int32_t real_length =
unicode::ToUpper(data16, length, source16, source->length(), &error);
if (!error && real_length == length)
return new_impl;
new_impl = StringImpl::CreateUninitialized(real_length, data16);
unicode::ToUpper(data16, real_length, source16, source->length(), &error);
if (error)
return source;
return new_impl;
const char* locale = ""; // "" = root locale.
return CaseConvert(CaseMapType::kUpper, source, locale, offset_map);
}
scoped_refptr<StringImpl> CaseMap::ToLower(StringImpl* source) const {
if (!case_map_locale_)
return FastToLowerInvariant(source);
return CaseConvert(CaseMapType::kLowerLegacy, source, case_map_locale_);
}
scoped_refptr<StringImpl> CaseMap::ToLower(StringImpl* source,
TextOffsetMap* offset_map) const {
DCHECK(source);
DCHECK(!offset_map || offset_map->IsEmpty());
scoped_refptr<StringImpl> CaseMap::ToUpper(StringImpl* source) const {
if (!case_map_locale_)
return FastToUpperInvariant(source);
return CaseConvert(CaseMapType::kUpperLegacy, source, case_map_locale_);
return ToLowerInvariant(source, offset_map);
return CaseConvert(CaseMapType::kLower, source, case_map_locale_, offset_map);
}
String CaseMap::ToLower(const String& source) const {
if (StringImpl* impl = source.Impl())
return ToLower(impl);
return String();
}
scoped_refptr<StringImpl> CaseMap::ToUpper(StringImpl* source,
TextOffsetMap* offset_map) const {
DCHECK(source);
DCHECK(!offset_map || offset_map->IsEmpty());
String CaseMap::ToUpper(const String& source) const {
if (StringImpl* impl = source.Impl())
return ToUpper(impl);
return String();
if (!case_map_locale_)
return ToUpperInvariant(source, offset_map);
return CaseConvert(CaseMapType::kUpper, source, case_map_locale_, offset_map);
}
String CaseMap::ToLower(const String& source, TextOffsetMap* offset_map) const {
DCHECK(offset_map && offset_map->IsEmpty());
DCHECK(!offset_map || offset_map->IsEmpty());
if (UNLIKELY(source.IsEmpty()))
return source;
// TODO(kojii): Implement fast-path for simple cases.
return CaseConvert(CaseMapType::kLower, source.Impl(), case_map_locale_,
offset_map);
if (StringImpl* impl = source.Impl())
return ToLower(impl, offset_map);
return String();
}
String CaseMap::ToUpper(const String& source, TextOffsetMap* offset_map) const {
DCHECK(offset_map && offset_map->IsEmpty());
DCHECK(!offset_map || offset_map->IsEmpty());
if (UNLIKELY(source.IsEmpty()))
return source;
// TODO(kojii): Implement fast-path for simple cases.
return CaseConvert(CaseMapType::kUpper, source.Impl(), case_map_locale_,
offset_map);
if (StringImpl* impl = source.Impl())
return ToUpper(impl, offset_map);
return String();
}
} // namespace WTF
......@@ -43,14 +43,10 @@ class WTF_EXPORT CaseMap {
// Construct from a locale string. The given locale is normalized.
explicit CaseMap(const AtomicString& locale) : CaseMap(Locale(locale)) {}
String ToLower(const String& source) const;
String ToUpper(const String& source) const;
scoped_refptr<StringImpl> ToLower(StringImpl* source) const;
scoped_refptr<StringImpl> ToUpper(StringImpl* source) const;
String ToLower(const String& source, TextOffsetMap* offset_map) const;
String ToUpper(const String& source, TextOffsetMap* offset_map) const;
String ToLower(const String& source,
TextOffsetMap* offset_map = nullptr) const;
String ToUpper(const String& source,
TextOffsetMap* offset_map = nullptr) const;
// Fast code path for simple cases, only for root locale.
// TODO(crbug.com/627682): This should move to private, once
......@@ -58,8 +54,21 @@ class WTF_EXPORT CaseMap {
static scoped_refptr<StringImpl> FastToLowerInvariant(StringImpl* source);
private:
// Fast code path for simple cases, only for root locale.
static scoped_refptr<StringImpl> FastToUpperInvariant(StringImpl* source);
scoped_refptr<StringImpl> ToLower(StringImpl* source,
TextOffsetMap* offset_map = nullptr) const;
scoped_refptr<StringImpl> ToUpper(StringImpl* source,
TextOffsetMap* offset_map = nullptr) const;
// Fast code path for simple cases for root locale. When the fast code path is
// not possible, fallback to full Unicode casing using the root locale.
static scoped_refptr<StringImpl> ToLowerInvariant(StringImpl* source,
TextOffsetMap* offset_map);
static scoped_refptr<StringImpl> ToUpperInvariant(StringImpl* source,
TextOffsetMap* offset_map);
// Try the fast code path. Returns |nullptr| when the string cannot use the
// fast code path. The caller is responsible to fallback to full algorithm.
static scoped_refptr<StringImpl> TryFastToLowerInvariant(StringImpl* source);
const char* case_map_locale_;
};
......
......@@ -13,6 +13,19 @@ using testing::ElementsAreArray;
namespace WTF {
namespace {
String To8BitOrNull(const String& source) {
if (source.IsNull() || source.Is8Bit())
return source;
if (!source.ContainsOnlyLatin1OrEmpty())
return String();
return String::Make8BitFrom16BitSource(source.Characters16(),
source.length());
}
} // namespace
static struct CaseMapTestData {
const char16_t* source;
const char* locale;
......@@ -105,6 +118,32 @@ TEST_P(CaseMapTest, ToUpper) {
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map));
}
TEST_P(CaseMapTest, ToLower8Bit) {
const auto data = GetParam();
String source(data.source);
source = To8BitOrNull(source);
if (!source)
return;
CaseMap case_map(data.locale);
TextOffsetMap offset_map;
String lower = case_map.ToLower(source, &offset_map);
EXPECT_EQ(lower, String(data.lower_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.lower_map));
}
TEST_P(CaseMapTest, ToUpper8Bit) {
const auto data = GetParam();
String source(data.source);
source = To8BitOrNull(source);
if (!source)
return;
CaseMap case_map(data.locale);
TextOffsetMap offset_map;
String upper = case_map.ToUpper(source, &offset_map);
EXPECT_EQ(upper, String(data.upper_expected));
EXPECT_THAT(offset_map.Entries(), ElementsAreArray(data.upper_map));
}
struct CaseFoldingTestData {
const char* source_description;
const char* source;
......
......@@ -13,6 +13,12 @@ std::ostream& operator<<(std::ostream& stream,
return stream << "{" << entry.source << ", " << entry.target << "}";
}
void TextOffsetMap::Append(wtf_size_t source, wtf_size_t target) {
DCHECK(IsEmpty() ||
(source > entries_.back().source && target > entries_.back().target));
entries_.emplace_back(source, target);
}
void TextOffsetMap::Append(const icu::Edits& edits) {
DCHECK(IsEmpty());
......
......@@ -42,6 +42,7 @@ class WTF_EXPORT TextOffsetMap {
void Clear() { entries_.Shrink(0); }
void Append(wtf_size_t source, wtf_size_t target);
void Append(const icu::Edits& edits);
private:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment