Commit 9176768a authored by drott@chromium.org's avatar drott@chromium.org

Upstream ScriptRunIterator for segmenting text runs by script

ScriptRunIterator takes a pointer to a UTF-16 text run, and a starting
script value, then consume() can be called on it to retrieve the limit
and script value of the next segmented script run. It takes care of
matching brackets when resolving script runs.

This functionality is needed for changing HarfBuzzShaper.cpp so that
we do not need to pre-split and store the TextRun into HarfBuzzRuns.
We can improve our script segmentation and integrate script splitting
and shaping in one loop.

Original code written by Doug Felt, big thanks!

BUG=526095
R=eae,behdad

Review URL: https://codereview.chromium.org/1323513006

git-svn-id: svn://svn.chromium.org/blink/trunk@201722 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent dd98ae68
......@@ -391,13 +391,15 @@
'fonts/GlyphPageTreeNode.cpp',
'fonts/GlyphPageTreeNode.h',
'fonts/Latin1TextIterator.h',
'fonts/UTF16TextIterator.cpp',
'fonts/UTF16TextIterator.h',
'fonts/ScriptRunIterator.h',
'fonts/ScriptRunIterator.cpp',
'fonts/SegmentedFontData.cpp',
'fonts/SegmentedFontData.h',
'fonts/SimpleFontData.cpp',
'fonts/SimpleFontData.h',
'fonts/TextBlob.h',
'fonts/UTF16TextIterator.cpp',
'fonts/UTF16TextIterator.h',
'fonts/VDMXParser.cpp',
'fonts/VDMXParser.h',
'fonts/android/FontCacheAndroid.cpp',
......@@ -961,6 +963,7 @@
'fonts/FontTest.cpp',
'fonts/GlyphBufferTest.cpp',
'fonts/GlyphPageTreeNodeTest.cpp',
'fonts/ScriptRunIteratorTest.cpp',
'fonts/android/FontCacheAndroidTest.cpp',
'fonts/mac/FontFamilyMatcherMacTest.mm',
'fonts/shaping/CachingWordShaperTest.cpp',
......
This diff is collapsed.
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef ScriptRunIterator_h
#define ScriptRunIterator_h
#include "platform/PlatformExport.h"
#include "wtf/Deque.h"
#include "wtf/Vector.h"
#include "wtf/dtoa/utils.h"
#include <unicode/uchar.h>
#include <unicode/uscript.h>
namespace blink {
class ScriptData;
class PLATFORM_EXPORT ScriptRunIterator {
public:
ScriptRunIterator(const UChar* text, size_t length);
// This maintains a reference to data. It must exist for the lifetime of
// this object. Typically data is a singleton that exists for the life of
// the process.
ScriptRunIterator(const UChar* text, size_t length, const ScriptData*);
bool consume(unsigned& limit, UScriptCode&);
private:
struct BracketRec {
UChar32 ch;
UScriptCode script;
};
void openBracket(UChar32);
void closeBracket(UChar32);
bool mergeSets();
void fixupStack(UScriptCode resolvedScript);
bool fetch(size_t* pos, UChar32*);
UScriptCode resolveCurrentScript() const;
const UChar* m_text;
const size_t m_length;
Deque<BracketRec> m_brackets;
size_t m_bracketsFixupDepth;
// Limit max brackets so that the bracket tracking buffer does not grow
// excessively large when processing long runs of text.
static const int kMaxBrackets = 32;
Vector<UScriptCode> m_currentSet;
Vector<UScriptCode> m_nextSet;
Vector<UScriptCode> m_aheadSet;
UChar32 m_aheadCharacter;
size_t m_aheadPos;
UScriptCode m_commonPreferred;
const ScriptData* m_scriptData;
DISALLOW_COPY_AND_ASSIGN(ScriptRunIterator);
};
// ScriptData is a wrapper which returns a set of scripts for a particular
// character retrieved from the character's primary script and script extensions,
// as per ICU / Unicode data. ScriptData maintains a certain priority order of
// the returned values, which are essential for mergeSets method to work
// correctly.
class PLATFORM_EXPORT ScriptData {
protected:
ScriptData() = default;
public:
virtual ~ScriptData();
enum PairedBracketType {
BracketTypeNone,
BracketTypeOpen,
BracketTypeClose,
BracketTypeCount
};
static const int kMaxScriptCount;
virtual void getScripts(UChar32, Vector<UScriptCode>& dst) const = 0;
virtual UChar32 getPairedBracket(UChar32) const = 0;
virtual PairedBracketType getPairedBracketType(UChar32) const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(ScriptData);
};
class PLATFORM_EXPORT ICUScriptData : public ScriptData {
public:
~ICUScriptData() override
{
}
static const ICUScriptData* instance();
void getScripts(UChar32, Vector<UScriptCode>& dst) const override;
UChar32 getPairedBracket(UChar32) const override;
PairedBracketType getPairedBracketType(UChar32) const override;
private:
ICUScriptData()
{
}
DISALLOW_COPY_AND_ASSIGN(ICUScriptData);
};
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment