Commit 4c57052c authored by Etienne Bergeron's avatar Etienne Bergeron Committed by Commit Bot

Replace PUA (Private Use Areas) codepoints by an alternative codepoint

The PUA (Private Use Areas) codepoints are not assigned by unicode.
They can be used with an hardcoded font to display custom made glyphs.
This feature is not used in chrome and will lead to fallback fonts.

based on https://chromium-review.googlesource.com/c/chromium/src/+/829378
see http://crbug.com/795225


see: https://en.wikipedia.org/wiki/Private_Use_Areas
Bug: 1018378
Change-Id: I4b218336dba859eda2729bc9668d866e4ec03211
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1887531
Commit-Queue: Etienne Bergeron <etienneb@chromium.org>
Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarDominik Röttsches <drott@chromium.org>
Cr-Commit-Position: refs/heads/master@{#711257}
parent 6d50efe5
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "cc/paint/paint_canvas.h" #include "cc/paint/paint_canvas.h"
#include "cc/paint/paint_shader.h" #include "cc/paint/paint_shader.h"
#include "third_party/icu/source/common/unicode/rbbi.h" #include "third_party/icu/source/common/unicode/rbbi.h"
#include "third_party/icu/source/common/unicode/uchar.h"
#include "third_party/icu/source/common/unicode/utf16.h" #include "third_party/icu/source/common/unicode/utf16.h"
#include "third_party/skia/include/core/SkDrawLooper.h" #include "third_party/skia/include/core/SkDrawLooper.h"
#include "third_party/skia/include/core/SkFontStyle.h" #include "third_party/skia/include/core/SkFontStyle.h"
...@@ -192,25 +193,45 @@ void RestoreBreakList(RenderText* render_text, BreakList<T>* break_list) { ...@@ -192,25 +193,45 @@ void RestoreBreakList(RenderText* render_text, BreakList<T>* break_list) {
} }
} }
// Replace the unicode control characters ISO 6429 (block C0) by their // Replace the codepoints not handled by RenderText by an other compatible
// corresponsing visual symbols. Control chracters can't be displayed but // codepoint. Replace the unicode control characters ISO 6429 (block C0) by
// their visual symbols can. // their corresponding visual symbols. Control characters can't be displayed but
// their visual symbols can. Replace PUA (Private Use Areas) codepoints with the
// 'replacement character'.
void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) { void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
// 'REPLACEMENT CHARACTER' used to replace an unknown, unrecognized or
// unrepresentable character.
constexpr base::char16 kReplacementCodepoint = 0xFFFD;
// Control Pictures block (see: https://unicode.org/charts/PDF/U2400.pdf). // Control Pictures block (see: https://unicode.org/charts/PDF/U2400.pdf).
constexpr base::char16 kSymbolsCodepoint = 0x2400; constexpr base::char16 kSymbolsCodepoint = 0x2400;
for (size_t offset = 0; offset < text->size(); ++offset) {
base::char16 control_codepoint = (*text)[offset]; size_t next_offset = 0;
if (control_codepoint >= 0 && control_codepoint <= 0x1F) { while (next_offset < text->length()) {
size_t offset = next_offset;
UChar32 codepoint;
U16_NEXT(text->c_str(), next_offset, text->length(), codepoint);
if (codepoint >= 0 && codepoint <= 0x1F) {
// The newline character should be kept as-is when rendertext is // The newline character should be kept as-is when rendertext is
// multiline. // multiline.
if (control_codepoint == '\n' && multiline) if (codepoint == '\n' && multiline)
continue; continue;
// Replace codepoints with their visual symbols, which are at the same // Replace codepoints with their visual symbols, which are at the same
// offset from kSymbolsCodepoint. // offset from kSymbolsCodepoint.
(*text)[offset] = kSymbolsCodepoint + control_codepoint; (*text)[offset] = kSymbolsCodepoint + codepoint;
} else if (control_codepoint == 0x7F) { } else if (codepoint == 0x7F) {
// Replace the 'del' codepoint by its symbol (u2421). // Replace the 'del' codepoint by its symbol (u2421).
(*text)[offset] = kSymbolsCodepoint + 0x21; (*text)[offset] = kSymbolsCodepoint + 0x21;
} else if (codepoint > 0x7F) {
// Private use codepoints are working with a pair of font and codepoint,
// but they are not used in Chrome.
const int8_t codepoint_category = u_charType(codepoint);
if (codepoint_category == U_PRIVATE_USE_CHAR) {
(*text)[offset] = kReplacementCodepoint;
// We may need to replace the surrogate pair.
if (next_offset != offset + 1)
(*text)[offset + 1] = kReplacementCodepoint;
}
} }
} }
} }
......
...@@ -4530,6 +4530,16 @@ TEST_F(RenderTextTest, ControlCharacterReplacement) { ...@@ -4530,6 +4530,16 @@ TEST_F(RenderTextTest, ControlCharacterReplacement) {
EXPECT_EQ(WideToUTF16(L"␈␍␇␉\n␋␌"), render_text->GetDisplayText()); EXPECT_EQ(WideToUTF16(L"␈␍␇␉\n␋␌"), render_text->GetDisplayText());
} }
TEST_F(RenderTextTest, PrivateUseCharacterReplacement) {
RenderText* render_text = GetRenderText();
render_text->SetText(UTF8ToUTF16("xx\ue78d\ue78fa\U00100042z"));
// The private use characters should have been replaced. If the code point is
// a surrogate pair, it needs to be replaced by two characters.
EXPECT_EQ(WideToUTF16(L"xx\ufffd\ufffda\ufffd\ufffdz"),
render_text->GetDisplayText());
}
// Make sure the horizontal positions of runs in a line (left-to-right for // Make sure the horizontal positions of runs in a line (left-to-right for
// LTR languages and right-to-left for RTL languages). // LTR languages and right-to-left for RTL languages).
TEST_F(RenderTextTest, HarfBuzz_HorizontalPositions) { TEST_F(RenderTextTest, HarfBuzz_HorizontalPositions) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment