Commit 103f7f70 authored by kojii's avatar kojii Committed by Commit bot

Refactor CachingWordShapeIterator::nextWordEndIndex

This patch refactors CachingWordShapeIterator::nextWordEndIndex so that:

1. Better readability.
2. Split 3 cases into 3 different loops to make it possible to identify
   the loop from the stack trace.

BUG=639085

Review-Url: https://codereview.chromium.org/2263083002
Cr-Commit-Position: refs/heads/master@{#413463}
parent c6d602ef
...@@ -111,7 +111,7 @@ private: ...@@ -111,7 +111,7 @@ private:
return ch == spaceCharacter || ch == tabulationCharacter; return ch == spaceCharacter || ch == tabulationCharacter;
} }
unsigned nextWordEndIndex() unsigned nextWordEndIndex() const
{ {
const unsigned length = m_textRun.length(); const unsigned length = m_textRun.length();
if (m_startIndex >= length) if (m_startIndex >= length)
...@@ -120,47 +120,49 @@ private: ...@@ -120,47 +120,49 @@ private:
if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startIndex])) if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startIndex]))
return m_startIndex + 1; return m_startIndex + 1;
// Delimit every CJK character because these scripts do not delimit // 8Bit words end at isWordDelimiter().
// words by spaces, and not delimiting hits the performance. if (m_textRun.is8Bit()) {
if (!m_textRun.is8Bit()) { for (unsigned i = m_startIndex + 1; ; i++) {
UChar32 ch; if (i == length || isWordDelimiter(m_textRun[i]))
unsigned end = m_startIndex; return i;
U16_NEXT(m_textRun.characters16(), end, length, ch);
if (Character::isCJKIdeographOrSymbol(ch)) {
bool hasAnyScript = !Character::isCommonOrInheritedScript(ch);
for (unsigned i = end; i < length; end = i) {
U16_NEXT(m_textRun.characters16(), i, length, ch);
// ZWJ and modifier check in order not to split those Emoji sequences.
if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)
|| ch == zeroWidthJoinerCharacter || Character::isModifier(ch))
continue;
// Avoid delimiting COMMON/INHERITED alone, which makes harder to
// identify the script.
if (Character::isCJKIdeographOrSymbol(ch)) {
if (Character::isCommonOrInheritedScript(ch))
continue;
if (!hasAnyScript) {
hasAnyScript = true;
continue;
}
}
return end;
}
return length;
} }
} }
for (unsigned i = m_startIndex + 1; ; i++) { // Non-CJK/Emoji words end at isWordDelimiter() or CJK/Emoji characters.
if (i == length || isWordDelimiter(m_textRun[i])) { unsigned end = m_startIndex;
return i; UChar32 ch = m_textRun.codepointAtAndNext(end);
if (!Character::isCJKIdeographOrSymbol(ch)) {
for (unsigned nextEnd = end; end < length; end = nextEnd) {
ch = m_textRun.codepointAtAndNext(nextEnd);
if (isWordDelimiter(ch) || Character::isCJKIdeographOrSymbolBase(ch))
return end;
} }
if (!m_textRun.is8Bit()) { return length;
UChar32 nextChar; }
U16_GET(m_textRun.characters16(), 0, i, length, nextChar);
if (Character::isCJKIdeographOrSymbolBase(nextChar)) // For CJK/Emoji words, delimit every character because these scripts do
return i; // not delimit words by spaces, and delimiting only at isWordDelimiter()
// worsen the cache efficiency.
bool hasAnyScript = !Character::isCommonOrInheritedScript(ch);
for (unsigned nextEnd = end; end < length; end = nextEnd) {
ch = m_textRun.codepointAtAndNext(nextEnd);
// ZWJ and modifier check in order not to split those Emoji sequences.
if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)
|| ch == zeroWidthJoinerCharacter || Character::isModifier(ch))
continue;
// Avoid delimiting COMMON/INHERITED alone, which makes harder to
// identify the script.
if (Character::isCJKIdeographOrSymbol(ch)) {
if (Character::isCommonOrInheritedScript(ch))
continue;
if (!hasAnyScript) {
hasAnyScript = true;
continue;
}
} }
return end;
} }
return length;
} }
bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex) bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex)
...@@ -180,7 +182,7 @@ private: ...@@ -180,7 +182,7 @@ private:
return result->get(); return result->get();
} }
unsigned endIndexUntil(UChar ch) unsigned endIndexUntil(UChar ch) const
{ {
unsigned length = m_textRun.length(); unsigned length = m_textRun.length();
ASSERT(m_startIndex < length); ASSERT(m_startIndex < length);
......
...@@ -35,6 +35,8 @@ ...@@ -35,6 +35,8 @@
#include "wtf/text/StringView.h" #include "wtf/text/StringView.h"
#include "wtf/text/WTFString.h" #include "wtf/text/WTFString.h"
#include <unicode/utf16.h>
class SkTextBlob; class SkTextBlob;
namespace blink { namespace blink {
...@@ -150,6 +152,16 @@ public: ...@@ -150,6 +152,16 @@ public:
const LChar* characters8() const { ASSERT(is8Bit()); return m_data.characters8; } const LChar* characters8() const { ASSERT(is8Bit()); return m_data.characters8; }
const UChar* characters16() const { ASSERT(!is8Bit()); return m_data.characters16; } const UChar* characters16() const { ASSERT(!is8Bit()); return m_data.characters16; }
UChar32 codepointAtAndNext(unsigned& i) const
{
if (is8Bit())
return (*this)[i++];
UChar32 codepoint;
SECURITY_DCHECK(i < m_len);
U16_NEXT(characters16(), i, m_len, codepoint);
return codepoint;
}
bool is8Bit() const { return m_is8Bit; } bool is8Bit() const { return m_is8Bit; }
unsigned length() const { return m_len; } unsigned length() const { return m_len; }
unsigned charactersLength() const { return m_charactersLength; } unsigned charactersLength() const { return m_charactersLength; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment